framework,version,device,op_name,kernel_source,gemm_dtype,m,n,k,latency
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,65536,10240,12.818304061889648
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,65536,10240,13.652864456176758
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,65536,12288,15.48249626159668
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,65536,12288,14.786239624023438
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,65536,16384,23.83465576171875
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,65536,16384,18.615264892578125
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,65536,8192,9.43830394744873
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,65536,8192,10.489664077758789
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,65536,12288,12.401151657104492
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,65536,7168,9.036031723022461
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,65536,7168,9.534015655517578
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,65536,6144,7.17519998550415
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,65536,6144,7.46889591217041
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,65536,8192,8.503935813903809
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,65536,10240,10.510272026062012
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,65536,16384,16.324735641479492
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,65536,5120,6.0977277755737305
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,65536,5120,6.628799915313721
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,65536,4096,4.9549760818481445
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,65536,7168,7.467423915863037
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,65536,4096,5.150239944458008
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,65536,3584,4.377888202667236
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,65536,6144,6.536575794219971
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,65536,3584,4.512063980102539
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,65536,3072,3.772671937942505
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,65536,3072,3.9084160327911377
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,65536,4096,4.494592189788818
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,65536,5120,5.4924798011779785
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,65536,2560,3.3153278827667236
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,65536,2048,2.643615961074829
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,65536,2560,3.218143939971924
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,65536,2048,2.694272041320801
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,65536,3584,4.065887928009033
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,65536,3072,3.5061440467834473
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,65536,1536,2.123136043548584
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,65536,1536,2.1064319610595703
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,65536,1024,1.525823950767517
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,65536,1024,1.541632056236267
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,65536,2560,3.001568078994751
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,65536,512,0.94268798828125
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,65536,768,1.2243839502334595
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,65536,768,1.2419840097427368
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,65536,2048,2.504096031188965
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,65536,512,0.9453439712524414
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,65536,1536,2.013792037963867
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,65536,1024,1.5235199928283691
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,65536,256,0.6005759835243225
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,65536,256,0.5977280139923096
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,65536,128,0.45449599623680115
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,65536,128,0.43001601099967957
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,65536,768,1.2791359424591064
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,65536,64,0.43510401248931885
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,65536,64,0.41148799657821655
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,65536,32,0.4233280122280121
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,65536,32,0.4171519875526428
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,65536,256,0.7613120079040527
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,65536,128,0.5916479825973511
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,65536,512,1.0247999429702759
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,65536,64,0.5071039795875549
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,65536,32,0.5096960067749023
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,16384,12288,3.6196799278259277
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,16384,12288,3.5096640586853027
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,16384,16384,4.813759803771973
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,16384,16384,4.639520168304443
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,16384,10240,2.950432062149048
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,16384,10240,3.0555200576782227
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,16384,16384,4.21727991104126
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,16384,8192,2.382528066635132
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,16384,12288,3.1770880222320557
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,16384,8192,2.474047899246216
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,16384,10240,2.6722559928894043
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,16384,7168,2.1666879653930664
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,16384,8192,2.1812479496002197
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,16384,7168,2.102047920227051
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,16384,6144,1.8277759552001953
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,16384,7168,1.9160319566726685
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,16384,5120,1.548192024230957
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,16384,6144,1.8852800130844116
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,16384,5120,1.580672025680542
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,16384,4096,1.252351999282837
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,16384,4096,1.2878719568252563
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,16384,6144,1.6763839721679688
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,16384,4096,1.1561599969863892
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,16384,3584,1.1461440324783325
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,16384,3584,1.1117119789123535
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,16384,5120,1.4219199419021606
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,16384,3072,0.9777920246124268
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,16384,3584,1.0351359844207764
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,16384,3072,1.0173439979553223
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,16384,2560,0.84169602394104
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,16384,2560,0.8498560190200806
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,16384,3072,0.9062719941139221
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,16384,2048,0.6836479902267456
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,16384,2048,0.6994879841804504
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,16384,1536,0.5322239995002747
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,16384,1536,0.5492479801177979
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,16384,2560,0.7755200266838074
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,16384,1536,0.5195199847221375
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,16384,1024,0.3943359851837158
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,16384,1024,0.4005120098590851
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,16384,2048,0.647711992263794
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,16384,1024,0.3967680037021637
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,16384,768,0.32041600346565247
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,16384,768,0.33212798833847046
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,16384,512,0.2521600127220154
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,16384,768,0.3314560055732727
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,16384,256,0.1539520025253296
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,16384,65536,22.53593635559082
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,16384,512,0.26441600918769836
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,16384,128,0.1141119971871376
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,16384,256,0.15510399639606476
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,16384,512,0.2471040040254593
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,16384,256,0.19673599302768707
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,16384,128,0.1093439981341362
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,16384,64,0.1098880022764206
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,16384,64,0.1103999987244606
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,16384,128,0.15324799716472626
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,16384,32,0.11184000223875046
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,16384,32,0.11446399986743927
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,16384,64,0.13264000415802002
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,16384,32,0.13334399461746216
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,16384,65536,22.936447143554688
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,12288,16384,3.6213440895080566
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,12288,16384,3.51311993598938
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,12288,12288,2.6594879627227783
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,12288,16384,3.1403839588165283
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,12288,12288,2.722048044204712
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,12288,10240,2.289952039718628
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,12288,12288,2.384320020675659
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,12288,10240,2.2158079147338867
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,12288,10240,2.00764799118042
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,12288,8192,1.7964799404144287
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,12288,8192,1.8685439825057983
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,12288,8192,1.6253440380096436
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,12288,7168,1.5836800336837769
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,12288,7168,1.6454399824142456
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,12288,6144,1.384992003440857
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,12288,7168,1.4387840032577515
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,12288,6144,1.4150400161743164
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,16384,65536,18.016864776611328
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,12288,6144,1.2492799758911133
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,12288,5120,1.1610560417175293
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,12288,5120,1.1987199783325195
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,12288,4096,0.9456639885902405
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,12288,65536,16.677215576171875
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,12288,4096,0.9820160269737244
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,12288,3584,0.849727988243103
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,12288,5120,1.0603519678115845
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,12288,3584,0.8748480081558228
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,12288,4096,0.8685439825057983
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,12288,3072,0.7664639949798584
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,12288,3072,0.7409600019454956
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,12288,3584,0.7782719731330872
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,12288,2560,0.6282560229301453
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,12288,3072,0.6841920018196106
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,12288,2048,0.5150079727172852
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,12288,2560,0.6494399905204773
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,12288,2048,0.528768002986908
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,12288,1536,0.40809598565101624
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,12288,1536,0.4166080057621002
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,12288,2560,0.5876160264015198
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,12288,2048,0.4891520142555237
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,12288,1024,0.30035200715065
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,12288,1536,0.3964160084724426
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,12288,1024,0.3073599934577942
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,12288,768,0.2460159957408905
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,12288,768,0.25356799364089966
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,12288,512,0.18854400515556335
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,12288,1024,0.30191999673843384
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,12288,512,0.19494399428367615
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,12288,768,0.2542400062084198
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,12288,256,0.11494400352239609
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,12288,512,0.20496000349521637
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,12288,256,0.12064000219106674
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,12288,128,0.08828800171613693
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,12288,256,0.14963200688362122
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,12288,128,0.08460800349712372
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,12288,64,0.08454400300979614
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,12288,64,0.08422400057315826
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,12288,128,0.11638399958610535
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,12288,64,0.10182400047779083
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,12288,32,0.09097599983215332
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,12288,32,0.0902400016784668
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,12288,32,0.10188800096511841
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,12288,65536,16.123104095458984
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,10240,12288,2.229696035385132
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,10240,16384,3.0309441089630127
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,10240,16384,2.9222400188446045
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,10240,12288,2.315648078918457
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,10240,16384,2.616192102432251
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,10240,10240,1.857632040977478
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,10240,12288,1.9880640506744385
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,10240,10240,1.9148160219192505
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,10240,8192,1.505568027496338
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,12288,65536,14.069279670715332
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,10240,8192,1.5578240156173706
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,10240,10240,1.674239993095398
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,10240,7168,1.3243520259857178
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,10240,7168,1.3742400407791138
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,10240,8192,1.3547199964523315
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,10240,6144,1.190943956375122
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,10240,6144,1.1577600240707397
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,10240,6144,1.041856050491333
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,10240,7168,1.1996480226516724
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,10240,5120,0.9815359711647034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,10240,5120,1.0141119956970215
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,10240,5120,0.8846079707145691
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,10240,4096,0.7990080118179321
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,10240,4096,0.8285120129585266
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,10240,3584,0.7157440185546875
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,10240,4096,0.7308160066604614
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,10240,3584,0.727840006351471
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,10240,3072,0.6181759834289551
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,10240,65536,13.90719985961914
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,10240,3584,0.6488000154495239
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,10240,3072,0.6434879899024963
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,10240,2560,0.5543680191040039
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,10240,2560,0.5260159969329834
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,10240,2048,0.4474560022354126
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,10240,2048,0.4331200122833252
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,10240,3072,0.5714880228042603
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,10240,1536,0.34300801157951355
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,10240,2560,0.4903680086135864
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,10240,2048,0.4097599983215332
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,10240,1024,0.2519040107727051
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,10240,1024,0.259552001953125
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,10240,1536,0.3569920063018799
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,10240,1024,0.2515200078487396
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,10240,1536,0.33161601424217224
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,10240,768,0.21439999341964722
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,10240,768,0.20668800175189972
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,10240,512,0.16099199652671814
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,10240,768,0.21324799954891205
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,10240,512,0.16492800414562225
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,10240,256,0.09590400010347366
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,10240,256,0.1003199964761734
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,10240,512,0.17174400389194489
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,10240,256,0.1260800063610077
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,10240,128,0.07459200173616409
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,10240,128,0.0721919983625412
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,10240,64,0.07235199958086014
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,10240,128,0.09824000298976898
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,10240,64,0.0727040022611618
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,10240,32,0.07654400169849396
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,10240,64,0.08560000360012054
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,10240,32,0.07708799839019775
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,10240,32,0.08607999980449677
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,10240,65536,13.145183563232422
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,8192,12288,1.8226879835128784
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,8192,16384,2.4673280715942383
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,8192,16384,2.389024019241333
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,8192,16384,2.0950400829315186
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,8192,12288,1.8652479648590088
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,8192,10240,1.5326080322265625
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,8192,12288,1.5909440517425537
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,8192,8192,1.2387839555740356
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,8192,10240,1.578752040863037
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,10240,65536,12.482784271240234
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,8192,8192,1.280351996421814
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,8192,10240,1.3500479459762573
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,8192,7168,1.1006720066070557
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,8192,8192,1.0928640365600586
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,8192,6144,0.9451199769973755
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,8192,7168,1.1270719766616821
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,8192,6144,0.97462397813797
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,8192,7168,0.9605759978294373
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,8192,5120,0.8197439908981323
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,8192,5120,0.7987200021743774
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,8192,5120,0.7090240120887756
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,8192,6144,0.8417279720306396
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,8192,4096,0.6552320122718811
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,8192,4096,0.6711680293083191
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,8192,3072,0.5139200091362
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,8192,65536,11.655872344970703
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,8192,4096,0.582368016242981
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,8192,3584,0.5805760025978088
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,8192,3584,0.6011520028114319
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,8192,3072,0.521664023399353
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,8192,2560,0.42950400710105896
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,8192,3584,0.5209919810295105
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,8192,2560,0.44377601146698
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,8192,2048,0.3567360043525696
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,8192,3072,0.4574719965457916
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,8192,2560,0.3956800103187561
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,8192,2048,0.3672960102558136
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,8192,1536,0.27487999200820923
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,8192,1536,0.28271999955177307
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,8192,2048,0.32787200808525085
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,8192,1024,0.20633600652217865
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,8192,1024,0.20236800611019135
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,8192,1024,0.20127999782562256
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,8192,1536,0.2656640112400055
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,8192,512,0.13385599851608276
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,8192,768,0.16438399255275726
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,8192,768,0.1693120002746582
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,8192,256,0.0796160027384758
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,8192,768,0.171424001455307
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,8192,512,0.13276800513267517
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,8192,256,0.08166400343179703
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,8192,512,0.13593600690364838
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,8192,256,0.1029760017991066
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,8192,128,0.060736000537872314
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,8192,65536,11.31004810333252
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,8192,128,0.05958399921655655
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,8192,64,0.05974400043487549
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,8192,128,0.08012799918651581
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,8192,64,0.06032000109553337
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,8192,32,0.06464000046253204
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,8192,32,0.06259199976921082
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,8192,64,0.07020799815654755
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,8192,32,0.07062400132417679
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,7168,12288,1.5977280139923096
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,7168,16384,2.225951910018921
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,7168,16384,2.093503952026367
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,7168,12288,1.63481605052948
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,7168,16384,1.8342080116271973
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,7168,10240,1.3315199613571167
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,7168,10240,1.3803199529647827
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,8192,65536,9.56230354309082
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,7168,12288,1.3935680389404297
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,7168,8192,1.0783679485321045
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,7168,8192,1.1203199625015259
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,7168,7168,0.9642879962921143
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,7168,7168,0.9955840110778809
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,7168,10240,1.1807359457015991
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,7168,8192,0.9498559832572937
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,7168,6144,0.8611199855804443
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,7168,6144,0.8283200263977051
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,7168,6144,0.7375040054321289
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,7168,7168,0.8415679931640625
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,7168,5120,0.7028800249099731
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,7168,5120,0.7184000015258789
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,7168,4096,0.5684159994125366
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,7168,5120,0.6202239990234375
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,7168,4096,0.5902400016784668
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,7168,3584,0.5079039931297302
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,7168,4096,0.5095999836921692
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,7168,3584,0.5238720178604126
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,7168,3072,0.4453760087490082
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,7168,3072,0.4589119851589203
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,7168,65536,10.512831687927246
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,7168,3584,0.45606398582458496
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,7168,2560,0.3786880075931549
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,7168,2560,0.39187198877334595
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,7168,2048,0.31356799602508545
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,7168,3072,0.40169599652290344
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,7168,2560,0.3461120128631592
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,7168,2048,0.3238399922847748
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,7168,1536,0.24323199689388275
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,7168,1536,0.25171199440956116
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,7168,1024,0.17865599691867828
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,7168,1024,0.18569600582122803
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,7168,2048,0.28883200883865356
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,7168,1536,0.23388800024986267
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,7168,1024,0.17696000635623932
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,7168,768,0.14524799585342407
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,7168,65536,9.960736274719238
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,7168,512,0.1143679991364479
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,7168,768,0.15081599354743958
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,7168,512,0.1175680011510849
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,7168,768,0.1510400027036667
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,7168,256,0.06956800073385239
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,7168,128,0.05516799911856651
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,7168,256,0.07356800138950348
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,7168,512,0.1207680031657219
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,7168,128,0.052799999713897705
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,7168,256,0.09107200056314468
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,7168,128,0.07119999825954437
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,7168,64,0.053888000547885895
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,7168,64,0.053599998354911804
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,7168,32,0.05936000123620033
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,7168,64,0.062431998550891876
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,7168,32,0.05772799998521805
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,7168,32,0.0629120022058487
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,6144,16384,1.8001919984817505
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,6144,12288,1.3680319786071777
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,6144,12288,1.401792049407959
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,6144,16384,1.8675199747085571
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,6144,10240,1.1427520513534546
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,6144,16384,1.5849920511245728
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,6144,10240,1.1872960329055786
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,6144,8192,0.9256640076637268
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,6144,12288,1.1953279972076416
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,6144,8192,0.9619519710540771
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,7168,65536,8.616031646728516
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,6144,7168,0.8300480246543884
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,6144,8192,0.814624011516571
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,6144,10240,1.0058879852294922
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,6144,7168,0.8490880131721497
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,6144,6144,0.7178239822387695
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,6144,5120,0.6041280031204224
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,6144,6144,0.7368959784507751
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,6144,7168,0.7225599884986877
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,6144,5120,0.6225280165672302
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,6144,6144,0.6276479959487915
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,6144,4096,0.4918400049209595
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,6144,4096,0.5054399967193604
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,6144,3584,0.43724799156188965
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,6144,5120,0.5330560207366943
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,6144,3584,0.4511680006980896
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,6144,4096,0.4370880126953125
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,6144,65536,8.570560455322266
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,6144,3584,0.39187198877334595
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,6144,3072,0.38473600149154663
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,6144,3072,0.39743998646736145
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,6144,2560,0.33478400111198425
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,6144,2048,0.2717120051383972
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,6144,2048,0.2787199914455414
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,6144,3072,0.3449600040912628
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,6144,2560,0.2964479923248291
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,6144,2560,0.3251839876174927
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,6144,1536,0.21119999885559082
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,6144,1536,0.21667200326919556
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,6144,2048,0.24729600548744202
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,6144,1024,0.1552319973707199
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,6144,1536,0.20127999782562256
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,6144,1024,0.15884800255298615
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,6144,768,0.1266240030527115
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,6144,65536,8.293536186218262
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,6144,768,0.1297920048236847
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,6144,1024,0.1528639942407608
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,6144,512,0.09942399710416794
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,6144,512,0.10364799946546555
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,6144,768,0.1295360028743744
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,6144,256,0.062272001057863235
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,6144,256,0.06339199841022491
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,6144,512,0.10412800312042236
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,6144,128,0.047680001705884933
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,6144,256,0.0785600021481514
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,6144,128,0.04822399839758873
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,6144,128,0.06195199862122536
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,6144,64,0.049215998500585556
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,6144,64,0.04822399839758873
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,6144,32,0.0530879981815815
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,6144,64,0.05471999943256378
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,6144,32,0.0525440014898777
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,6144,32,0.05615999922156334
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,5120,65536,7.146687984466553
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,5120,12288,1.1368639469146729
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,5120,16384,1.558303952217102
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,5120,16384,1.500991940498352
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,5120,12288,1.171231985092163
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,5120,16384,1.3114880323410034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,5120,10240,0.9821119904518127
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,5120,10240,0.9605439901351929
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,5120,12288,0.9973440170288086
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,5120,8192,0.8030719757080078
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,5120,8192,0.781823992729187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,6144,65536,7.341472148895264
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,5120,7168,0.686847984790802
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,5120,10240,0.8403840065002441
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,5120,6144,0.5943040251731873
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,5120,8192,0.6872320175170898
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,5120,7168,0.7084479928016663
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,5120,5120,0.5107200145721436
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,5120,6144,0.6146559715270996
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,5120,5120,0.5232319831848145
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,5120,7168,0.6044480204582214
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,5120,6144,0.5243840217590332
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,5120,4096,0.41497600078582764
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,5120,4096,0.4249599874019623
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,5120,5120,0.4461440145969391
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,5120,3584,0.3684160113334656
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,5120,4096,0.3659839928150177
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,5120,3584,0.3771519958972931
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,5120,3072,0.32310399413108826
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,5120,3072,0.33024001121520996
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,5120,2560,0.2776319980621338
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,5120,3584,0.32787200808525085
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,5120,3072,0.28803199529647827
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,5120,2560,0.2829760015010834
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,5120,2048,0.2274560034275055
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,5120,2048,0.23296000063419342
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,5120,65536,7.291296005249023
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,5120,2560,0.2486400008201599
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,5120,1536,0.17708800733089447
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,5120,1536,0.18131199479103088
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,5120,1024,0.12988799810409546
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,5120,2048,0.20793600380420685
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,5120,1024,0.13417600095272064
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,5120,1536,0.16847999393939972
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,5120,768,0.10742399841547012
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,5120,1024,0.12809599936008453
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,5120,768,0.11075200140476227
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,5120,512,0.08419200032949448
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,5120,512,0.0872960016131401
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,5120,768,0.10924799740314484
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,5120,256,0.053247999399900436
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,5120,512,0.08780799806118011
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,5120,256,0.055296000093221664
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,5120,256,0.06601600348949432
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,5120,128,0.040832001715898514
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,5120,128,0.04057599976658821
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,5120,128,0.05270399898290634
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,5120,64,0.04182400181889534
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,5120,64,0.041152000427246094
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,5120,64,0.046879999339580536
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,5120,32,0.04438399896025658
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,5120,32,0.046879999339580536
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,5120,32,0.046720001846551895
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,4096,16384,1.2475199699401855
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,4096,16384,1.2003519535064697
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,4096,12288,0.913919985294342
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,4096,16384,1.0501439571380615
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,4096,12288,0.9372159838676453
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,4096,10240,0.7716159820556641
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,4096,12288,0.7987520098686218
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,4096,10240,0.7943040132522583
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,4096,8192,0.624288022518158
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,5120,65536,6.1565117835998535
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,4096,10240,0.6787199974060059
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,4096,8192,0.6439039707183838
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,4096,7168,0.5603520274162292
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,4096,8192,0.5449600219726562
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,4096,7168,0.5642880201339722
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,4096,6144,0.48425599932670593
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,4096,6144,0.4954560101032257
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,4096,7168,0.4873279929161072
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,4096,5120,0.4052479863166809
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,4096,5120,0.42259201407432556
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,4096,65536,6.125152111053467
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,4096,6144,0.4206719994544983
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,4096,4096,0.33900800347328186
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,4096,4096,0.3309760093688965
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,4096,5120,0.35974401235580444
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,4096,4096,0.29577600955963135
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,4096,3584,0.2969279885292053
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,4096,65536,5.94271993637085
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,4096,3584,0.30396801233291626
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,4096,3072,0.2575039863586426
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,4096,3072,0.2670400142669678
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,4096,3584,0.2624639868736267
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,4096,2560,0.22102400660514832
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,4096,2560,0.2282560020685196
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,4096,2048,0.18329599499702454
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,4096,3072,0.2311359941959381
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,4096,2560,0.19961600005626678
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,4096,1536,0.14448000490665436
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,4096,2048,0.18911999464035034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,4096,1536,0.1467839926481247
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,4096,2048,0.16601599752902985
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,4096,1024,0.10547199845314026
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,4096,1024,0.10790400207042694
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,4096,768,0.08752000331878662
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,4096,1024,0.10304000228643417
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,4096,1536,0.1355839967727661
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,4096,768,0.08947200328111649
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,4096,512,0.06918399780988693
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,4096,512,0.07081600278615952
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,4096,768,0.0872960016131401
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,4096,256,0.04435199871659279
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,4096,256,0.045184001326560974
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,4096,512,0.07107199728488922
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,4096,256,0.054496001452207565
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,4096,128,0.03497600182890892
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,4096,128,0.033663999289274216
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,4096,64,0.03542400151491165
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,4096,128,0.04399999976158142
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,4096,64,0.035711999982595444
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,4096,64,0.03951999917626381
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,4096,32,0.03872000053524971
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,4096,32,0.03840000182390213
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,4096,32,0.039583999663591385
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,3584,16384,1.0559680461883545
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,3584,12288,0.7949439883232117
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,3584,16384,1.0794880390167236
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,3584,12288,0.8176000118255615
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,4096,65536,5.025536060333252
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,3584,16384,0.9289600253105164
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,3584,10240,0.688543975353241
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,3584,10240,0.6671680212020874
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,3584,12288,0.7004799842834473
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,3584,7168,0.47708800435066223
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,3584,8192,0.5440639853477478
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,3584,8192,0.5569919943809509
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,3584,10240,0.5943679809570312
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,3584,8192,0.4779840111732483
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,3584,7168,0.4915199875831604
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,3584,6144,0.4151360094547272
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,3584,5120,0.35075199604034424
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,3584,6144,0.4216639995574951
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,3584,7168,0.42767998576164246
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,3584,5120,0.36025598645210266
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,3584,6144,0.36902400851249695
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,3584,4096,0.28812798857688904
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,3584,4096,0.2948479950428009
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,3584,5120,0.31353598833084106
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,3584,3584,0.2624639868736267
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,3584,3584,0.25600001215934753
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,3584,65536,5.222976207733154
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,3584,3072,0.22256000339984894
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,3584,4096,0.2577599883079529
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,3584,3584,0.23119999468326569
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,3584,3072,0.22899200022220612
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,3584,2560,0.191103994846344
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,3584,2560,0.19388799369335175
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,3584,65536,4.902400016784668
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,3584,3072,0.20371200144290924
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,3584,2560,0.17564800381660461
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,3584,2048,0.15769599378108978
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,3584,2048,0.16339200735092163
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,3584,1024,0.09536000341176987
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,3584,1536,0.12591999769210815
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,3584,1536,0.12956799566745758
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,3584,2048,0.14652800559997559
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,3584,1536,0.11977600306272507
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,3584,768,0.08032000064849854
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,3584,1024,0.09737599641084671
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,3584,768,0.08086399734020233
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,3584,512,0.061983998864889145
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,3584,1024,0.09071999788284302
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,3584,512,0.06415999680757523
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,3584,256,0.04073600098490715
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,3584,512,0.0628800019621849
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,3584,256,0.040863998234272
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,3584,128,0.03142400085926056
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,3584,256,0.04831999912858009
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,3584,128,0.03126399964094162
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,3584,64,0.0323840007185936
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,3584,128,0.039744000881910324
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,3584,64,0.03292800113558769
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,3584,32,0.03379200026392937
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,3584,768,0.07740800082683563
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,3584,64,0.035232000052928925
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,3584,32,0.03782400116324425
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,3584,32,0.03606399893760681
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,3072,12288,0.6823359727859497
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,3072,16384,0.9335039854049683
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,3072,16384,0.907263994216919
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,3072,12288,0.7004479765892029
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,3072,16384,0.79094398021698
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,3584,65536,4.488959789276123
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,3072,10240,0.5760639905929565
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,3072,12288,0.6014080047607422
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,3072,10240,0.5856959819793701
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,3072,8192,0.4675520062446594
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,3072,8192,0.4832319915294647
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,3072,10240,0.5071039795875549
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,3072,7168,0.4123840034008026
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,3072,7168,0.42185598611831665
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,3072,8192,0.4139519929885864
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,3072,6144,0.35708799958229065
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,3072,7168,0.3648639917373657
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,3072,6144,0.3633280098438263
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,3072,5120,0.3047359883785248
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,3072,4096,0.24748800694942474
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,3072,6144,0.3197439908981323
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,3072,5120,0.30768001079559326
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,3072,65536,4.431359767913818
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,3072,5120,0.26947200298309326
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,3072,65536,4.211008071899414
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,3072,4096,0.25545600056648254
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,3072,3584,0.22547200322151184
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,3072,3584,0.21820800006389618
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,3072,3072,0.19222399592399597
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,3072,3584,0.19840000569820404
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,3072,4096,0.2218559980392456
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,3072,3072,0.19631999731063843
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,3072,2560,0.16662399470806122
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,3072,3072,0.1756799966096878
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,3072,2560,0.16809600591659546
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,3072,2048,0.13715200126171112
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,3072,2048,0.13967999815940857
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,3072,1536,0.11049599945545197
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,3072,2560,0.15142400562763214
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,3072,2048,0.12716799974441528
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,3072,1536,0.1120000034570694
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,3072,1536,0.10284800082445145
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,3072,1024,0.08204799890518188
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,3072,1024,0.08336000144481659
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,3072,768,0.06844799965620041
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,3072,1024,0.0785600021481514
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,3072,768,0.07184000313282013
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,3072,768,0.06643199920654297
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,3072,512,0.05500800162553787
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,3072,512,0.056832000613212585
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,3072,512,0.054368000477552414
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,3072,256,0.03494400158524513
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,3072,256,0.03654399886727333
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,3072,256,0.043168000876903534
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,3072,128,0.02796800062060356
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,3072,128,0.02783999964594841
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,3072,64,0.030079999938607216
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,3072,128,0.0344959981739521
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,3072,64,0.03001599945127964
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,3072,64,0.03097599931061268
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,3072,32,0.030432000756263733
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,3072,32,0.03328000009059906
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,3072,32,0.0306560005992651
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,2560,12288,0.5754240155220032
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,2560,16384,0.7514240145683289
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,2560,16384,0.7819839715957642
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,2560,12288,0.5856959819793701
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,3072,65536,3.77126407623291
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,2560,10240,0.4829759895801544
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,2560,12288,0.502560019493103
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,2560,16384,0.6650239825248718
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,2560,8192,0.38979199528694153
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,2560,10240,0.4975680112838745
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,2560,7168,0.34563198685646057
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,2560,8192,0.40489599108695984
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,2560,10240,0.4240640103816986
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,2560,7168,0.3534719944000244
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,2560,8192,0.34668800234794617
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,2560,6144,0.30291199684143066
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,2560,6144,0.30585598945617676
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,2560,5120,0.25407999753952026
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,2560,6144,0.2654080092906952
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,2560,5120,0.26015999913215637
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,2560,4096,0.2093760073184967
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,2560,7168,0.3054080009460449
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,2560,5120,0.2261119931936264
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,2560,4096,0.2130880057811737
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,2560,4096,0.18665599822998047
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,2560,3584,0.18745599687099457
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,2560,65536,3.696000099182129
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,2560,3584,0.18320000171661377
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,2560,3072,0.1605439931154251
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,2560,3584,0.16684800386428833
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,2560,65536,3.6887359619140625
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,2560,3072,0.1653120070695877
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,2560,2560,0.13996799290180206
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,2560,2560,0.14163200557231903
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,2560,3072,0.14732800424098969
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,2560,2048,0.11660800129175186
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,2560,2560,0.12723200023174286
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,2560,2048,0.11875200271606445
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,2560,1536,0.09331200271844864
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,2560,1536,0.09561599791049957
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,2560,2048,0.10758399963378906
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,2560,1024,0.06940799951553345
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,2560,1536,0.08700799942016602
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,2560,1024,0.07267200201749802
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,2560,768,0.05820799991488457
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,2560,768,0.06032000109553337
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,2560,1024,0.06691200286149979
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,2560,768,0.05660799890756607
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,2560,512,0.04665600135922432
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,2560,512,0.0488319993019104
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,2560,512,0.0459199994802475
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,2560,256,0.030592000111937523
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,2560,256,0.031936001032590866
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,2560,256,0.0360959991812706
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,2560,128,0.024191999807953835
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,2560,128,0.02473600022494793
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,2560,128,0.030880000442266464
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,2560,64,0.025472000241279602
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,2560,64,0.02723200060427189
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,2560,64,0.02755199931561947
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,2560,32,0.027648000046610832
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,2560,32,0.027648000046610832
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,2560,32,0.028384000062942505
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,2048,12288,0.46025601029396057
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,2048,16384,0.6154239773750305
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,2048,16384,0.6020159721374512
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,2560,65536,2.9659199714660645
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,2048,12288,0.47088000178337097
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,2048,16384,0.53302401304245
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,2048,10240,0.38710400462150574
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,2048,10240,0.396479994058609
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,2048,12288,0.40220800042152405
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,2048,8192,0.31753599643707275
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,2048,8192,0.324864000082016
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,2048,10240,0.3394879996776581
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,2048,7168,0.27561599016189575
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,2048,8192,0.27827200293540955
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,2048,7168,0.28464001417160034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,2048,6144,0.24211199581623077
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,2048,6144,0.24624000489711761
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,2048,7168,0.24617600440979004
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,2048,5120,0.20307199656963348
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,2048,6144,0.21359999477863312
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,2048,5120,0.20812800526618958
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,2048,65536,3.058527946472168
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,2048,4096,0.16742399334907532
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,2048,4096,0.1717119961977005
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,2048,65536,2.949631929397583
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,2048,5120,0.18143999576568604
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,2048,4096,0.14908799529075623
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,2048,3584,0.1494400054216385
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,2048,3584,0.1518079936504364
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,2048,3072,0.13276800513267517
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,2048,3072,0.13014400005340576
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,2048,3584,0.1345919966697693
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,2048,2560,0.11187200248241425
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,2048,2560,0.11420799791812897
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,2048,3072,0.11875200271606445
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,2048,2048,0.0939520001411438
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,2048,2048,0.09619200229644775
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,2048,2560,0.10185600072145462
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,2048,2048,0.08550400286912918
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,2048,1536,0.0753600001335144
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,2048,1536,0.07699199765920639
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,2048,1536,0.070592001080513
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,2048,1024,0.0578560009598732
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,2048,1024,0.059007998555898666
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,2048,1024,0.05382400006055832
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,2048,768,0.04848000034689903
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,2048,768,0.04915200173854828
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,2048,512,0.03961599990725517
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,2048,768,0.0459199994802475
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,2048,512,0.04028800129890442
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,2048,512,0.0379519984126091
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,2048,256,0.02707199938595295
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,2048,256,0.027135999873280525
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,2048,256,0.02985600009560585
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,2048,128,0.021056000143289566
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,2048,128,0.022016000002622604
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,2048,64,0.022975999861955643
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,2048,128,0.025887999683618546
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,2048,32,0.024768000468611717
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,2048,64,0.02287999913096428
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,2048,32,0.024351999163627625
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,2048,32,0.023104000836610794
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,1536,16384,0.4591040015220642
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,2048,64,0.023520000278949738
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,2048,65536,2.466048002243042
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,1536,16384,0.4692800045013428
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,1536,12288,0.34809601306915283
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,1536,12288,0.3537920117378235
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,1536,16384,0.3985599875450134
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,1536,12288,0.30342400074005127
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,1536,10240,0.2913919985294342
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,1536,10240,0.30137598514556885
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,1536,8192,0.23868800699710846
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,1536,10240,0.2575039863586426
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,1536,8192,0.24569599330425262
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,1536,7168,0.21062399446964264
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,1536,8192,0.20947200059890747
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,1536,7168,0.21635200083255768
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,1536,6144,0.1834239959716797
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,1536,7168,0.1857919991016388
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,1536,6144,0.18716800212860107
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,1536,5120,0.15583999454975128
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,1536,6144,0.16140800714492798
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,1536,65536,2.1903998851776123
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,1536,5120,0.16016000509262085
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,1536,4096,0.1287039965391159
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,1536,65536,2.147615909576416
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,1536,4096,0.13123199343681335
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,1536,5120,0.13820800185203552
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,1536,3584,0.11401599645614624
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,1536,4096,0.11350400000810623
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,1536,3584,0.11692799627780914
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,1536,3072,0.10201600193977356
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,1536,3584,0.1027199998497963
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,1536,3072,0.10265599936246872
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,1536,2560,0.08579199761152267
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,1536,2560,0.08816000074148178
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,1536,3072,0.09094399958848953
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,1536,2048,0.07231999933719635
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,1536,2048,0.07273600250482559
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,1536,2560,0.07823999971151352
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,1536,2048,0.06592000275850296
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,1536,1536,0.05676800012588501
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,1536,1536,0.05795200169086456
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,1536,1536,0.054207999259233475
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,1536,1024,0.043296001851558685
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,1536,768,0.03939199820160866
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,1536,1024,0.04278400167822838
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,1536,1024,0.04214400053024292
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,1536,768,0.03561599925160408
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,1536,768,0.03542400151491165
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,1536,512,0.02924799919128418
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,1536,512,0.02924799919128418
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,1536,256,0.020576000213623047
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,1536,512,0.02985600009560585
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,1536,256,0.021215999498963356
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,1536,256,0.023871999233961105
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,1536,128,0.01692800037562847
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,1536,128,0.01744000054895878
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,1536,64,0.019840000197291374
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,1536,128,0.020608000457286835
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,1536,64,0.020128000527620316
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,1536,64,0.017983999103307724
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,1536,65536,1.9085760116577148
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,1536,32,0.021023999899625778
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,1536,32,0.021663999184966087
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,1536,32,0.018592000007629395
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,1024,12288,0.234592005610466
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,1024,12288,0.23862400650978088
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,1024,16384,0.3136639893054962
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,1024,16384,0.31942400336265564
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,1024,16384,0.26927998661994934
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,1024,12288,0.20390400290489197
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,1024,10240,0.19862399995326996
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,1024,65536,1.470047950744629
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,1024,10240,0.20297600328922272
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,1024,10240,0.1733119934797287
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,1024,8192,0.16438399255275726
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,1024,8192,0.16153599321842194
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,1024,7168,0.14681600034236908
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,1024,7168,0.14441600441932678
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,1024,8192,0.14079999923706055
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,1024,6144,0.12371200323104858
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,1024,6144,0.12886400520801544
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,1024,7168,0.12703999876976013
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,1024,5120,0.10979200154542923
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,1024,5120,0.1074879989027977
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,1024,4096,0.08793599903583527
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,1024,6144,0.11075200140476227
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,1024,5120,0.09353599697351456
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,1024,65536,1.597983956336975
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,1024,3584,0.08102399855852127
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,1024,4096,0.09100800007581711
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,1024,4096,0.07760000228881836
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,1024,3584,0.08070400357246399
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,1024,3072,0.07116799801588058
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,1024,2560,0.06022400036454201
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,1024,3584,0.06963200122117996
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,1024,3072,0.06217600032687187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,1024,2560,0.061824001371860504
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,1024,2560,0.05430399999022484
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,1024,2048,0.050944000482559204
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,1024,2048,0.051711998879909515
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,1024,2048,0.045471999794244766
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,1024,1536,0.039903998374938965
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,1024,1536,0.04095999896526337
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,1024,1536,0.0379519984126091
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,1024,1024,0.030688000842928886
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,1024,768,0.025760000571608543
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,1024,1024,0.03136000037193298
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,1024,3072,0.07215999811887741
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,1024,1024,0.02924799919128418
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,1024,768,0.026240000501275063
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,1024,768,0.025631999596953392
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,1024,65536,1.2367680072784424
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,1024,512,0.021247999742627144
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,1024,512,0.02131200022995472
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,1024,256,0.01616000011563301
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,1024,256,0.016224000602960587
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,1024,512,0.021568000316619873
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,1024,256,0.018079999834299088
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,1024,128,0.014688000082969666
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,1024,128,0.014720000326633453
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,1024,128,0.015584000386297703
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,1024,64,0.016831999644637108
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,1024,64,0.016767999157309532
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,1024,64,0.014303999952971935
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,1024,32,0.018112000077962875
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,1024,32,0.018751999363303185
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,1024,32,0.014879999682307243
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,768,12288,0.18518400192260742
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,768,16384,0.24022400379180908
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,768,12288,0.18905599415302277
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,768,16384,0.24662399291992188
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,768,12288,0.2038400024175644
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,768,16384,0.26953598856925964
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,768,10240,0.15942400693893433
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,768,10240,0.1626880019903183
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,768,10240,0.17254400253295898
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,768,8192,0.12771199643611908
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,768,8192,0.13331200182437897
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,768,7168,0.11472000181674957
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,768,8192,0.14006400108337402
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,768,7168,0.11817599833011627
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,768,7168,0.12595200538635254
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,768,6144,0.10179200023412704
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,768,6144,0.09955199807882309
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,768,6144,0.1098880022764206
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,768,65536,1.0817919969558716
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,768,65536,1.0688320398330688
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,768,5120,0.08495999872684479
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,768,5120,0.08720000088214874
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,768,4096,0.06976000219583511
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,768,5120,0.09324800223112106
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,768,4096,0.07129599899053574
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,768,3584,0.06294400244951248
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,768,3584,0.06464000046253204
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,768,4096,0.07760000228881836
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,768,3072,0.05641600117087364
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,768,3584,0.06960000097751617
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,768,3072,0.05718399956822395
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,768,3072,0.061824001371860504
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,768,2560,0.05347200110554695
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,768,2560,0.04934399947524071
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,768,2560,0.05008000135421753
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,768,2048,0.040800001472234726
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,768,1536,0.03276799991726875
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,768,2048,0.04543999955058098
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,768,2048,0.041728001087903976
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,768,65536,1.2610559463500977
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,768,1536,0.03363199904561043
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,768,1536,0.03657599911093712
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,768,1024,0.025855999439954758
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,768,1024,0.025696000084280968
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,768,768,0.022463999688625336
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,768,1024,0.02816000021994114
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,768,768,0.022463999688625336
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,768,768,0.023744000121951103
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,768,512,0.01881599985063076
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,768,256,0.01532800029963255
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,768,512,0.019487999379634857
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,768,256,0.015039999969303608
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,768,512,0.019360000267624855
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,768,128,0.012799999676644802
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,768,256,0.015168000012636185
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,768,128,0.01283199992030859
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,768,64,0.0161920003592968
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,768,128,0.013183999806642532
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,768,64,0.01616000011563301
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,768,32,0.017023999243974686
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,768,32,0.01708799973130226
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,768,64,0.01244799979031086
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,768,32,0.012160000391304493
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,512,12288,0.12204799801111221
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,512,16384,0.1635199934244156
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,512,16384,0.16502399742603302
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,512,16384,0.13728000223636627
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,512,12288,0.10704000294208527
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,512,10240,0.10579200088977814
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,512,10240,0.10684800148010254
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,512,12288,0.12438400089740753
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,512,8192,0.0872960016131401
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,512,10240,0.09123200178146362
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,512,8192,0.08921600133180618
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,512,7168,0.07676800340414047
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,512,8192,0.07475200295448303
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,512,7168,0.0785600021481514
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,512,7168,0.06700800359249115
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,512,6144,0.06777600198984146
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,512,65536,0.7674239873886108
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,512,65536,0.7466239929199219
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,512,6144,0.0689919963479042
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,512,5120,0.05875200033187866
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,512,6144,0.057631999254226685
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,512,5120,0.06019200012087822
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,512,5120,0.05008000135421753
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,512,3584,0.04508800059556961
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,512,4096,0.04249599948525429
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,512,3584,0.0459199994802475
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,512,4096,0.05158400163054466
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,512,4096,0.05020799860358238
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,512,3584,0.03798399865627289
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,512,65536,0.6676480174064636
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,512,3072,0.04054399952292442
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,512,2560,0.0363520011305809
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,512,3072,0.040672000497579575
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,512,3072,0.03702399879693985
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,512,2560,0.036639999598264694
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,512,2560,0.03017600066959858
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,512,2048,0.03155200183391571
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,512,2048,0.032287999987602234
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,512,2048,0.02582399919629097
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,512,1536,0.025248000398278236
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,512,1536,0.02566399984061718
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,512,1024,0.019680000841617584
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,512,1536,0.02175999991595745
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,512,1024,0.020191999152302742
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,512,1024,0.01775999926030636
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,512,768,0.018144000321626663
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,512,768,0.01775999926030636
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,512,768,0.01651199907064438
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,512,512,0.014495999552309513
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,512,512,0.015424000099301338
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,512,256,0.01235199999064207
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,512,512,0.013504000380635262
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,512,256,0.012319999746978283
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,512,256,0.011711999773979187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,512,128,0.011103999800980091
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,512,128,0.011327999643981457
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,512,128,0.01104000024497509
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,512,64,0.014271999709308147
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,512,64,0.014271999709308147
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,512,64,0.010143999941647053
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,512,32,0.01500799972563982
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,512,32,0.015744000673294067
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,512,32,0.010304000228643417
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,256,12288,0.08563199639320374
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,256,12288,0.08524800091981888
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,256,16384,0.10876800119876862
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,256,16384,0.10841599851846695
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,256,16384,0.0989760011434555
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,256,12288,0.07705599814653397
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,256,10240,0.0751039981842041
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,256,10240,0.07356800138950348
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,256,10240,0.06691200286149979
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,256,8192,0.060864001512527466
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,256,8192,0.06195199862122536
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,256,8192,0.05353600159287453
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,256,7168,0.05615999922156334
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,256,7168,0.047807998955249786
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,256,7168,0.05584000051021576
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,256,65536,0.47679999470710754
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,256,6144,0.050912000238895416
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,256,65536,0.45772799849510193
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,256,6144,0.047359999269247055
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,256,5120,0.05615999922156334
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,256,6144,0.04224000126123428
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,256,5120,0.04479999840259552
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,256,5120,0.036320000886917114
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,256,4096,0.03711999952793121
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,256,4096,0.037728000432252884
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,256,65536,0.43929600715637207
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,256,4096,0.03097599931061268
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,256,3584,0.034304000437259674
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,256,3584,0.03513599932193756
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,256,3584,0.028063999488949776
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,256,3072,0.0390079990029335
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,256,3072,0.029952000826597214
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,256,2560,0.03651199862360954
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,256,2560,0.022143999114632607
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,256,2560,0.02566399984061718
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,256,3072,0.025407999753952026
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,256,2048,0.02163200080394745
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,256,2048,0.021503999829292297
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,256,2048,0.020959999412298203
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,256,1536,0.018719999119639397
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,256,1536,0.01817600056529045
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,256,1024,0.014976000413298607
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,256,1536,0.017184000462293625
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,256,1024,0.014944000169634819
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,256,1024,0.013472000136971474
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,256,768,0.013663999736309052
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,256,512,0.011935999616980553
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,256,768,0.013120000250637531
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,256,512,0.011872000060975552
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,256,256,0.011231999844312668
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,256,512,0.010688000358641148
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,256,256,0.009600000455975533
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,256,256,0.01056000031530857
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,256,128,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,256,128,0.010847999714314938
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,256,64,0.013728000223636627
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,256,128,0.008832000195980072
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,256,64,0.013344000093638897
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,256,64,0.007968000136315823
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,256,32,0.013952000066637993
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,256,32,0.00854399986565113
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,256,32,0.013952000066637993
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,256,768,0.014015999622642994
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,128,12288,0.08032000064849854
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,128,12288,0.08137600123882294
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,128,16384,0.10246399790048599
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,128,16384,0.10147199779748917
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,128,16384,0.062111999839544296
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,128,12288,0.04931199923157692
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,128,10240,0.0703359991312027
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,128,10240,0.06876800209283829
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,128,10240,0.04255999997258186
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,128,8192,0.0578560009598732
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,128,8192,0.0586559996008873
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,128,7168,0.053599998354911804
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,128,8192,0.03436800092458725
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,128,7168,0.03299200162291527
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,128,7168,0.05363199859857559
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,128,65536,0.37327998876571655
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,128,65536,0.3681600093841553
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,128,6144,0.04601600021123886
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,128,6144,0.04483199864625931
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,128,6144,0.02876799926161766
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,128,5120,0.04230400174856186
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,128,5120,0.040991999208927155
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,128,65536,0.25279998779296875
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,128,5120,0.024960000067949295
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,128,4096,0.03753599897027016
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,128,4096,0.03497600182890892
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,128,4096,0.021023999899625778
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,128,3584,0.03155200183391571
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,128,3584,0.03248000144958496
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,128,3584,0.019519999623298645
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,128,3072,0.027456000447273254
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,128,3072,0.027936000376939774
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,128,3072,0.017343999817967415
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,128,2560,0.022272000089287758
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,128,2560,0.022975999861955643
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,128,2048,0.02006400004029274
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,128,2560,0.015968000516295433
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,128,2048,0.01852799952030182
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,128,1536,0.015615999698638916
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,128,2048,0.014431999996304512
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,128,1536,0.01568000018596649
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,128,1536,0.012384000234305859
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,128,1024,0.013279999606311321
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,128,1024,0.012640000320971012
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,128,1024,0.010528000071644783
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,128,768,0.01206399966031313
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,128,768,0.012415999546647072
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,128,768,0.009952000342309475
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,128,512,0.011711999773979187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,128,512,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,128,512,0.009119999594986439
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,128,256,0.010111999697983265
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,128,256,0.010048000141978264
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,128,256,0.008383999578654766
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,128,128,0.009727999567985535
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,128,128,0.009727999567985535
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,128,128,0.008224000222980976
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,128,64,0.013151999562978745
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,128,64,0.013407999649643898
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,128,64,0.007872000336647034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,128,32,0.01360000018030405
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,128,32,0.013407999649643898
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,128,32,0.007935999892652035
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,64,12288,0.08006399869918823
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,64,12288,0.08111999928951263
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,64,16384,0.1029760017991066
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,64,16384,0.10201600193977356
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,64,16384,0.05923200026154518
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,64,12288,0.04879999905824661
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,64,10240,0.06883200258016586
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,64,10240,0.06883200258016586
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,64,8192,0.05798399820923805
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,64,10240,0.041728001087903976
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,64,8192,0.03302399814128876
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,64,7168,0.054336000233888626
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,64,7168,0.03049599938094616
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,64,7168,0.05183999985456467
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,64,6144,0.04902400076389313
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,64,65536,0.37145599722862244
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,64,65536,0.3709760010242462
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,64,65536,0.20188799500465393
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,64,8192,0.05798399820923805
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,64,6144,0.044895999133586884
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,64,5120,0.04108799993991852
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,64,6144,0.026784000918269157
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,64,5120,0.042367998510599136
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,64,5120,0.023264000192284584
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,64,3584,0.03219199925661087
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,64,4096,0.03494400158524513
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,64,4096,0.03446400165557861
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,64,4096,0.02179200015962124
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,64,3584,0.03049599938094616
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,64,3072,0.027168000116944313
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,64,3584,0.017696000635623932
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,64,3072,0.016287999227643013
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,64,3072,0.02675200067460537
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,64,2560,0.022655999287962914
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,64,2560,0.022112000733613968
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,64,2560,0.014783999882638454
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,64,2048,0.018144000321626663
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,64,2048,0.012671999633312225
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,64,2048,0.019168000668287277
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,64,1536,0.01583999954164028
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,64,1024,0.012512000277638435
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,64,1536,0.015647999942302704
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,64,1536,0.011008000001311302
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,64,1024,0.012736000120639801
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,64,1024,0.009151999838650227
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,64,768,0.011807999573647976
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,64,768,0.012671999633312225
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,64,768,0.008832000195980072
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,64,512,0.01033599954098463
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,64,512,0.010080000385642052
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,64,512,0.008511999621987343
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,64,256,0.009759999811649323
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,64,256,0.009727999567985535
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,64,256,0.00863999966531992
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,64,128,0.009312000125646591
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,64,128,0.009824000298976898
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,64,128,0.007552000228315592
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,64,64,0.012671999633312225
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,64,64,0.013183999806642532
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,64,64,0.007391999941319227
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,64,32,0.012896000407636166
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,64,32,0.013279999606311321
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,64,32,0.007679999805986881
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,32,12288,0.07945600152015686
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,32,12288,0.08048000186681747
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,32,16384,0.10140799731016159
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,32,16384,0.10198400169610977
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,32,16384,0.05740800127387047
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,32,12288,0.048928000032901764
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,32,10240,0.06857600063085556
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,32,8192,0.058848001062870026
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,32,10240,0.06982400268316269
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,32,10240,0.0395519994199276
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,32,8192,0.05798399820923805
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,32,8192,0.034623999148607254
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,32,6144,0.047775998711586
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,32,7168,0.05158400163054466
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,32,7168,0.05180799961090088
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,32,7168,0.030112000182271004
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,32,65536,0.36537599563598633
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,32,65536,0.36953601241111755
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,32,6144,0.04521600157022476
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,32,6144,0.027103999629616737
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,32,65536,0.2017280012369156
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,32,5120,0.040608000010252
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,32,5120,0.023264000192284584
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,32,5120,0.040672000497579575
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,32,4096,0.03411199897527695
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,32,4096,0.03542400151491165
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,32,4096,0.019936000928282738
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,32,3584,0.03139200061559677
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,32,3584,0.03139200061559677
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,32,3584,0.0191040001809597
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,32,3072,0.027327999472618103
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,32,3072,0.027103999629616737
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,32,3072,0.015135999768972397
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,32,2560,0.022272000089287758
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,32,2560,0.022655999287962914
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,32,2560,0.014911999925971031
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,32,2048,0.019168000668287277
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,32,2048,0.019936000928282738
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,32,1536,0.015296000055968761
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,32,2048,0.011359999887645245
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,32,1536,0.015424000099301338
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,32,1536,0.011008000001311302
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,32,1024,0.012799999676644802
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,32,1024,0.012768000364303589
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,32,1024,0.009631999768316746
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,32,768,0.011872000060975552
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,32,768,0.011552000418305397
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,32,512,0.010239999741315842
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,32,768,0.008608000352978706
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,32,512,0.01065600011497736
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,32,512,0.009503999724984169
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,32,256,0.009247999638319016
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,32,256,0.009535999968647957
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,32,256,0.008352000266313553
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,32,128,0.009056000038981438
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,32,128,0.010111999697983265
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,32,128,0.0080960001796484
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,32,64,0.013279999606311321
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,32,64,0.007360000163316727
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,32,32,0.013183999806642532
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8192,32,32,0.012959999963641167
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8192,32,32,0.007327999919652939
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8192,32,64,0.013344000093638897
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,65536,10240,6.354911804199219
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,65536,10240,5.877503871917725
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,65536,12288,7.160160064697266
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,65536,12288,7.144063949584961
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,65536,16384,10.678175926208496
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,65536,16384,9.243904113769531
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,65536,8192,4.741055965423584
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,65536,8192,5.119967937469482
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,65536,12288,6.2521281242370605
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,65536,7168,4.160319805145264
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,65536,7168,4.483391761779785
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,65536,6144,3.6019198894500732
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,65536,8192,4.2887678146362305
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,65536,6144,3.834656000137329
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,65536,16384,8.232799530029297
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,65536,10240,5.258848190307617
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,65536,4096,2.5984320640563965
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,65536,5120,3.009471893310547
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,65536,5120,3.227231979370117
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,65536,4096,2.4456639289855957
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,65536,7168,3.766752004623413
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,65536,3072,1.8966399431228638
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,65536,3584,2.1652159690856934
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,65536,3584,2.302079916000366
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,65536,6144,3.2710399627685547
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,65536,3072,1.980031967163086
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,65536,2560,1.6059520244598389
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,65536,2560,1.6682560443878174
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,65536,4096,2.271967887878418
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,65536,5120,2.7725119590759277
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,65536,1536,1.0387519598007202
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,65536,2048,1.3304959535598755
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,65536,3584,2.0285439491271973
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,65536,2048,1.3649920225143433
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,65536,3072,1.776352047920227
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,65536,1536,1.0673279762268066
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,65536,1024,0.7801600098609924
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,65536,2560,1.5272959470748901
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,65536,1024,0.7633919715881348
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,65536,768,0.6176000237464905
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,65536,768,0.6350719928741455
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,65536,1536,1.0151679515838623
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,65536,512,0.48131200671195984
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,65536,512,0.47971200942993164
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,65536,256,0.2802239954471588
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,65536,2048,1.2862080335617065
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,65536,256,0.28566399216651917
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,65536,1024,0.7611520290374756
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,65536,128,0.22947199642658234
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,65536,128,0.21772800385951996
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,65536,64,0.2136639952659607
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,65536,64,0.21907199919223785
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,65536,512,0.5043839812278748
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,65536,768,0.6305919885635376
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,65536,32,0.21503999829292297
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,65536,256,0.37932801246643066
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,65536,128,0.30275198817253113
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,65536,32,0.21027199923992157
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,65536,64,0.26124799251556396
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,65536,32,0.2622720003128052
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,16384,16384,2.4701120853424072
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,16384,16384,2.4047679901123047
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,16384,12288,1.8563200235366821
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,16384,12288,1.7994240522384644
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,16384,10240,1.5205119848251343
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,16384,12288,1.5911999940872192
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,16384,8192,1.2260799407958984
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,16384,10240,1.5652480125427246
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,16384,8192,1.262976050376892
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,16384,16384,2.111840009689331
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,16384,10240,1.3498879671096802
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,16384,7168,1.080064058303833
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,16384,7168,1.1126400232315063
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,16384,8192,1.0845119953155518
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,16384,6144,0.9691839814186096
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,16384,6144,0.9344000220298767
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,16384,7168,0.968608021736145
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,16384,5120,0.7822080254554749
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,16384,6144,0.8407679796218872
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,16384,5120,0.8104959726333618
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,16384,4096,0.6359040141105652
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,16384,5120,0.7083520293235779
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,16384,4096,0.6670719981193542
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,16384,3584,0.5702080130577087
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,16384,3072,0.4907839894294739
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,16384,4096,0.5852159857749939
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,16384,3584,0.5921279788017273
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,16384,3072,0.5055680274963379
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,16384,2560,0.42214399576187134
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,16384,3584,0.5193920135498047
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,16384,3072,0.45603200793266296
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,16384,2560,0.43718400597572327
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,16384,2048,0.35603201389312744
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,16384,2048,0.34806400537490845
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,16384,2560,0.39209601283073425
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,16384,1536,0.27395200729370117
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,16384,2048,0.3269439935684204
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,16384,1536,0.2831040024757385
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,16384,1024,0.20006400346755981
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,16384,1024,0.20572799444198608
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,16384,1536,0.26102399826049805
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,16384,768,0.16441600024700165
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,16384,768,0.1693439930677414
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,16384,1024,0.1952960044145584
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,16384,512,0.12912000715732574
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,16384,768,0.1632319986820221
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,16384,512,0.13209599256515503
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,16384,256,0.07075200229883194
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,16384,512,0.13196800649166107
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,16384,256,0.07558400183916092
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,16384,65536,11.55504035949707
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,16384,128,0.06102399900555611
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,16384,256,0.09932799637317657
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,16384,128,0.05894400179386139
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,16384,64,0.05798399820923805
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,16384,128,0.08156800270080566
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,16384,64,0.05907199904322624
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,16384,64,0.07206399738788605
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,16384,32,0.0655680000782013
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,16384,32,0.0623680017888546
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,16384,32,0.07231999933719635
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,16384,65536,11.180607795715332
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,12288,65536,8.486495971679688
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,12288,16384,1.8562560081481934
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,12288,16384,1.8029760122299194
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,12288,12288,1.3624639511108398
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,12288,12288,1.5453120470046997
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,12288,10240,1.169983983039856
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,12288,16384,1.5850880146026611
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,12288,8192,0.9162560105323792
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,12288,10240,1.1432960033416748
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,12288,12288,1.1951040029525757
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,12288,8192,0.9593279957771301
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,12288,7168,0.8140479922294617
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,12288,10240,1.0060160160064697
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,12288,8192,0.8154879808425903
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,12288,7168,0.8470399975776672
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,12288,6144,0.7103040218353271
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,12288,6144,0.7281280159950256
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,12288,7168,0.7281280159950256
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,12288,5120,0.599839985370636
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,12288,5120,0.6119679808616638
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,12288,6144,0.62745600938797
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,12288,4096,0.48419201374053955
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,12288,4096,0.4984320104122162
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,12288,5120,0.5331199765205383
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,12288,3584,0.43113601207733154
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,12288,3584,0.443807989358902
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,12288,4096,0.4410240054130554
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,12288,3072,0.3745279908180237
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,12288,2560,0.3210560083389282
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,12288,3584,0.39107200503349304
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,12288,3072,0.3856959939002991
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,16384,65536,8.511455535888672
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,12288,2560,0.32979199290275574
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,12288,3072,0.3465920090675354
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,12288,2048,0.27136000990867615
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,12288,2048,0.26310399174690247
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,12288,2560,0.2953599989414215
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,12288,1536,0.2083200067281723
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,12288,2048,0.2478400021791458
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,12288,1024,0.15379199385643005
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,12288,1536,0.2168319970369339
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,12288,1024,0.15827199816703796
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,12288,1536,0.19804799556732178
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,12288,768,0.13094399869441986
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,12288,768,0.12777599692344666
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,12288,512,0.10153599828481674
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,12288,1024,0.14854399859905243
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,12288,768,0.12620800733566284
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,12288,512,0.10239999741315842
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,12288,256,0.05817599967122078
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,12288,256,0.05990400165319443
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,12288,512,0.10153599828481674
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,12288,128,0.047520000487565994
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,12288,128,0.04684799909591675
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,12288,256,0.078015998005867
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,12288,128,0.06323199719190598
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,12288,64,0.04975999891757965
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,12288,64,0.04710400104522705
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,12288,32,0.050592001527547836
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,12288,32,0.05158400163054466
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,12288,64,0.056095998734235764
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,12288,32,0.05644800141453743
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,12288,65536,7.01526403427124
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,10240,12288,1.1374399662017822
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,10240,16384,1.5381120443344116
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,10240,16384,1.5038399696350098
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,10240,12288,1.2740800380706787
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,10240,10240,0.9641280174255371
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,10240,16384,1.3116159439086914
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,10240,12288,0.997215986251831
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,10240,10240,0.975167989730835
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,10240,8192,0.7878400087356567
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,12288,65536,6.861120223999023
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,10240,8192,0.7710400223731995
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,10240,7168,0.7011200189590454
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,10240,6144,0.5863360166549683
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,10240,10240,0.8462079763412476
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,10240,7168,0.6806399822235107
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,10240,8192,0.6856639981269836
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,10240,6144,0.6041920185089111
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,10240,5120,0.4979200065135956
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,10240,7168,0.6028159856796265
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,10240,6144,0.5290240049362183
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,10240,5120,0.5123199820518494
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,10240,4096,0.4089600145816803
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,10240,4096,0.42054399847984314
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,10240,3584,0.37059199810028076
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,10240,5120,0.4456639885902405
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,10240,3584,0.35731199383735657
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,10240,65536,6.5990400314331055
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,10240,3072,0.32419198751449585
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,10240,4096,0.36508798599243164
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,10240,3072,0.31251201033592224
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,10240,3584,0.3264000117778778
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,10240,2560,0.2672640085220337
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,10240,2560,0.2752000093460083
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,10240,2048,0.22070400416851044
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,10240,3072,0.28678399324417114
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,10240,2048,0.22777600586414337
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,10240,1536,0.17529599368572235
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,10240,1536,0.18057599663734436
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,10240,2560,0.24982400238513947
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,10240,1024,0.1337919980287552
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,10240,1024,0.12857599556446075
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,10240,2048,0.20758399367332458
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,10240,1536,0.1666879951953888
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,10240,768,0.1096000000834465
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,10240,768,0.11020799726247787
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,10240,1024,0.12591999769210815
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,10240,65536,7.661664009094238
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,10240,512,0.08537600189447403
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,10240,512,0.08745600283145905
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,10240,768,0.10496000200510025
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,10240,256,0.05056000128388405
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,10240,256,0.05142400041222572
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,10240,512,0.08515200018882751
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,10240,128,0.039872001856565475
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,10240,128,0.040031999349594116
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,10240,64,0.041152000427246094
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,10240,256,0.0655359998345375
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,10240,128,0.05430399999022484
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,10240,64,0.04108799993991852
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,10240,64,0.04851200059056282
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,10240,32,0.04665600135922432
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,10240,32,0.04566400125622749
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,10240,32,0.04822399839758873
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,8192,12288,0.9121279716491699
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,8192,16384,1.2047679424285889
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,8192,65536,4.864416122436523
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,8192,12288,0.9385920166969299
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,8192,16384,1.2314239740371704
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,8192,10240,0.7662720084190369
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,8192,12288,0.7983999848365784
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,8192,10240,0.7816320061683655
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,8192,16384,1.0498239994049072
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,8192,8192,0.6366080045700073
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,8192,7168,0.5625280141830444
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,8192,8192,0.6195200085639954
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,8192,10240,0.6730560064315796
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,8192,7168,0.5407360196113586
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,10240,65536,5.2294721603393555
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,8192,8192,0.5489919781684875
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,8192,6144,0.4748480021953583
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,8192,6144,0.4887999892234802
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,8192,5120,0.40777599811553955
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,8192,7168,0.48761600255966187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,8192,5120,0.40009599924087524
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,8192,4096,0.33484798669815063
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,8192,6144,0.4204480051994324
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,8192,3584,0.289792001247406
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,8192,3584,0.29600000381469727
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,8192,4096,0.32547199726104736
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,8192,3072,0.25990399718284607
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,8192,5120,0.3567360043525696
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,8192,3072,0.2517760097980499
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,8192,4096,0.2956160008907318
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,8192,3584,0.2630079984664917
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,8192,2560,0.21795199811458588
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,8192,3072,0.23059199750423431
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,8192,2560,0.2242559939622879
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,8192,2048,0.18009600043296814
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,8192,2048,0.18268799781799316
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,8192,2560,0.1985280066728592
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,8192,1536,0.14083200693130493
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,8192,1536,0.14547200500965118
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,8192,2048,0.16659200191497803
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,8192,1536,0.13388800621032715
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,8192,1024,0.10819199681282043
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,8192,1024,0.10099200159311295
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,8192,768,0.08895999938249588
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,8192,768,0.09110400080680847
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,8192,512,0.06908799707889557
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,8192,768,0.0851840004324913
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,8192,1024,0.1045759990811348
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,8192,512,0.07062400132417679
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,8192,256,0.0414079986512661
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,8192,512,0.06908799707889557
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,8192,256,0.04150399938225746
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,8192,128,0.033215999603271484
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,8192,128,0.03324799984693527
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,8192,256,0.053408000618219376
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,8192,64,0.03590400144457817
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,8192,64,0.03440000116825104
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,8192,128,0.04543999955058098
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,8192,32,0.03852799907326698
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,8192,32,0.03807999938726425
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,8192,64,0.039744000881910324
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,8192,32,0.04057599976658821
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,8192,65536,5.435647964477539
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,7168,65536,4.390304088592529
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,7168,12288,0.8054080009460449
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,7168,16384,1.047711968421936
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,7168,16384,1.2365440130233765
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,7168,10240,0.6941760182380676
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,7168,12288,0.8157439827919006
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,7168,16384,0.9275199770927429
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,7168,10240,0.6669440269470215
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,7168,12288,0.6999040246009827
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,7168,8192,0.5591040253639221
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,7168,8192,0.5435199737548828
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,7168,10240,0.5900800228118896
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,8192,65536,4.254271984100342
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,7168,8192,0.4781759977340698
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,7168,7168,0.4790399968624115
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,7168,7168,0.49302399158477783
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,7168,6144,0.4185279905796051
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,7168,6144,0.42796799540519714
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,7168,7168,0.424127995967865
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,7168,5120,0.35075199604034424
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,7168,5120,0.36447998881340027
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,7168,4096,0.28575998544692993
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,7168,6144,0.3686079978942871
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,7168,4096,0.2948800027370453
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,7168,5120,0.31414398550987244
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,7168,3584,0.2547839879989624
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,7168,3584,0.2616640031337738
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,7168,4096,0.2595840096473694
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,7168,3072,0.22118400037288666
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,7168,3072,0.2284799963235855
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,7168,3584,0.23030400276184082
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,7168,65536,4.478367805480957
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,7168,2560,0.19033600389957428
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,7168,3072,0.2046400010585785
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,7168,2560,0.19500799477100372
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,7168,2048,0.1570879966020584
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,7168,2048,0.1613759994506836
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,7168,1536,0.12566399574279785
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,7168,1536,0.12963199615478516
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,7168,2560,0.17507199943065643
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,7168,2048,0.14627200365066528
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,7168,1536,0.11760000139474869
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,7168,1024,0.09363199770450592
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,7168,1024,0.0963200032711029
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,7168,768,0.07919999957084656
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,7168,768,0.0809599980711937
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,7168,1024,0.08931200206279755
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,7168,512,0.06307200342416763
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,7168,512,0.06492800265550613
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,7168,768,0.07561600208282471
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,7168,256,0.03791999816894531
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,7168,512,0.06137600168585777
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,7168,256,0.037408001720905304
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,7168,128,0.030880000442266464
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,7168,128,0.02876799926161766
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,7168,256,0.04739199951291084
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,7168,128,0.04047999903559685
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,7168,64,0.03190400078892708
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,7168,64,0.03145600110292435
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,7168,64,0.03606399893760681
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,7168,32,0.03433600068092346
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,7168,32,0.03296000137925148
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,7168,32,0.036896001547575
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,6144,12288,0.6822400093078613
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,6144,16384,0.9334400296211243
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,6144,16384,0.9069120287895203
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,6144,12288,0.7020800113677979
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,6144,16384,0.7948480248451233
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,6144,10240,0.5780159831047058
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,6144,10240,0.5904639959335327
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,7168,65536,4.0702080726623535
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,6144,8192,0.46326398849487305
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,6144,12288,0.6000000238418579
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,6144,7168,0.412416011095047
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,6144,10240,0.510208010673523
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,6144,7168,0.4264639914035797
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,6144,8192,0.4099520146846771
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,6144,8192,0.48099198937416077
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,6144,6144,0.36723199486732483
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,6144,6144,0.3578239977359772
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,6144,5120,0.3033919930458069
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,6144,7168,0.36454400420188904
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,6144,6144,0.3165760040283203
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,6144,5120,0.31379199028015137
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,6144,4096,0.24639999866485596
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,6144,4096,0.25488001108169556
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,6144,3584,0.2202560007572174
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,6144,5120,0.27142399549484253
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,6144,65536,4.163167953491211
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,6144,3072,0.19145600497722626
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,6144,4096,0.22179199755191803
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,6144,3584,0.2269120067358017
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,6144,3072,0.1974720060825348
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,6144,2560,0.17004799842834473
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,6144,65536,3.542720079421997
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,6144,3584,0.1987520009279251
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,6144,3072,0.17468799650669098
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,6144,2560,0.16582399606704712
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,6144,1536,0.11247999966144562
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,6144,2048,0.13574400544166565
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,6144,2048,0.13884800672531128
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,6144,1536,0.10924799740314484
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,6144,2560,0.1515199989080429
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,6144,1536,0.10220800340175629
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,6144,2048,0.12668800354003906
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,6144,1024,0.0825280025601387
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,6144,1024,0.08345600217580795
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,6144,768,0.06931199878454208
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,6144,768,0.07046400010585785
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,6144,512,0.055424001067876816
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,6144,1024,0.07769600301980972
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,6144,768,0.06611199676990509
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,6144,512,0.05612799897789955
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,6144,512,0.05318399891257286
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,6144,256,0.03420799970626831
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,6144,128,0.02595200017094612
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,6144,256,0.033695999532938004
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,6144,256,0.04163200035691261
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,6144,128,0.02735999971628189
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,6144,128,0.03580800071358681
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,6144,64,0.027103999629616737
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,6144,64,0.028863999992609024
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,6144,32,0.0297279991209507
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,6144,64,0.03206399828195572
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,6144,32,0.03081599995493889
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,6144,32,0.032575998455286026
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,5120,12288,0.5734080076217651
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,5120,16384,0.7791360020637512
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,5120,16384,0.758400022983551
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,6144,65536,3.06278395652771
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,5120,10240,0.47839999198913574
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,5120,12288,0.5867199897766113
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,5120,10240,0.4927999973297119
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,5120,16384,0.6594240069389343
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,5120,8192,0.3909119963645935
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,5120,8192,0.39878401160240173
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,5120,7168,0.3537600040435791
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,5120,12288,0.5017600059509277
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,5120,7168,0.3463360071182251
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,5120,10240,0.42659199237823486
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,5120,8192,0.343392014503479
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,5120,6144,0.2980799973011017
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,5120,5120,0.2519040107727051
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,5120,7168,0.30505600571632385
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,5120,6144,0.3081600069999695
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,5120,5120,0.2632319927215576
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,5120,6144,0.2675839960575104
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,5120,4096,0.21174399554729462
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,5120,4096,0.20611199736595154
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,5120,5120,0.22604799270629883
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,5120,3584,0.18639999628067017
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,5120,4096,0.18569600582122803
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,5120,3584,0.1897599995136261
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,5120,3072,0.16038399934768677
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,5120,3584,0.1664000004529953
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,5120,65536,3.1539840698242188
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,5120,3072,0.16438399255275726
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,5120,2560,0.13840000331401825
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,5120,2560,0.1435839980840683
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,5120,3072,0.1464959979057312
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,5120,2048,0.11488000303506851
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,5120,2048,0.11823999881744385
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,5120,2560,0.12671999633312225
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,5120,65536,3.5007359981536865
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,5120,1536,0.09299200028181076
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,5120,1536,0.09443199634552002
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,5120,2048,0.10688000172376633
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,5120,1024,0.07094399631023407
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,5120,1024,0.06934399902820587
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,5120,1536,0.08550400286912918
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,5120,768,0.059808000922203064
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,5120,1024,0.06550399959087372
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,5120,768,0.05929600074887276
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,5120,512,0.04771199822425842
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,5120,512,0.048448000103235245
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,5120,768,0.05558399856090546
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,5120,256,0.029055999591946602
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,5120,512,0.04681599885225296
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,5120,256,0.02985600009560585
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,5120,128,0.023296000435948372
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,5120,128,0.02300800010561943
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,5120,64,0.02595200017094612
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,5120,128,0.03126399964094162
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,5120,32,0.027008000761270523
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,5120,64,0.02457600086927414
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,5120,64,0.028255999088287354
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,5120,32,0.026079999282956123
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,5120,32,0.028736000880599022
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,5120,256,0.03561599925160408
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,4096,16384,0.624671995639801
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,4096,12288,0.4599039852619171
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,4096,12288,0.4697279930114746
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,4096,16384,0.6069120168685913
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,4096,65536,2.858720064163208
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,5120,65536,2.937824010848999
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,4096,10240,0.38886401057243347
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,4096,12288,0.4054720103740692
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,4096,16384,0.5317760109901428
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,4096,10240,0.3988800048828125
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,4096,8192,0.31145599484443665
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,4096,8192,0.328031986951828
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,4096,7168,0.28467199206352234
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,4096,7168,0.27830401062965393
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,4096,10240,0.33926400542259216
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,4096,6144,0.2479040026664734
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,4096,8192,0.2779200077056885
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,4096,6144,0.24499200284481049
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,4096,7168,0.24687999486923218
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,4096,5120,0.20367999374866486
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,4096,4096,0.1666560024023056
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,4096,5120,0.21014399826526642
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,4096,6144,0.21510399878025055
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,4096,3584,0.15491199493408203
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,4096,5120,0.1828799992799759
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,4096,4096,0.1712000072002411
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,4096,3584,0.14959999918937683
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,4096,4096,0.15033599734306335
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,4096,3072,0.1329600065946579
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,4096,3072,0.13017599284648895
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,4096,3584,0.13523200154304504
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,4096,3072,0.1181119978427887
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,4096,2560,0.1151999980211258
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,4096,2560,0.11318399757146835
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,4096,2048,0.09379199892282486
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,4096,2560,0.1021760031580925
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,4096,2048,0.09491200000047684
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,4096,1536,0.07583999633789062
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,4096,1536,0.07785599678754807
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,4096,65536,2.363935947418213
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,4096,2048,0.08579199761152267
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,4096,1024,0.05750399827957153
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,4096,1024,0.059167999774217606
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,4096,1536,0.06915199756622314
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,4096,768,0.04835199937224388
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,4096,1024,0.05353600159287453
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,4096,768,0.04992000013589859
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,4096,768,0.046112000942230225
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,4096,512,0.03964800015091896
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,4096,512,0.039744000881910324
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,4096,512,0.03721600025892258
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,4096,256,0.024831999093294144
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,4096,256,0.025248000398278236
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,4096,256,0.029184000566601753
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,4096,128,0.02022399939596653
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,4096,128,0.020287999883294106
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,4096,128,0.026464000344276428
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,4096,64,0.02179200015962124
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,4096,64,0.02147199958562851
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,4096,64,0.024447999894618988
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,4096,32,0.024383999407291412
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,4096,32,0.023679999634623528
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,4096,32,0.024480000138282776
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,3584,12288,0.40940800309181213
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,3584,16384,0.5469440221786499
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,3584,16384,0.5423679947853088
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,3584,12288,0.4195519983768463
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,4096,65536,2.3726720809936523
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,3584,10240,0.35580798983573914
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,3584,16384,0.5320960283279419
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,3584,8192,0.2845759987831116
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,3584,10240,0.3481599986553192
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,3584,12288,0.40198400616645813
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,3584,8192,0.2895039916038513
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,3584,10240,0.3392319977283478
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,3584,7168,0.2523840069770813
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,3584,8192,0.2773759961128235
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,3584,6144,0.21852800250053406
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,3584,6144,0.22627200186252594
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,3584,7168,0.25760000944137573
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,3584,5120,0.18911999464035034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,3584,7168,0.2465600073337555
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,3584,5120,0.1927040070295334
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,3584,6144,0.21411199867725372
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,3584,4096,0.16060799360275269
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,3584,4096,0.1573439985513687
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,3584,65536,2.4543681144714355
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,3584,4096,0.14873600006103516
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,3584,3584,0.14403200149536133
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,3584,3584,0.144896000623703
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,3584,65536,2.111072063446045
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,3584,3584,0.1340160071849823
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,3584,3072,0.12822400033473969
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,3584,3072,0.1260800063610077
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,3584,2560,0.11283200234174728
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,3584,5120,0.1815679967403412
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,3584,2560,0.11478400230407715
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,3584,2048,0.09379199892282486
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,3584,3072,0.11817599833011627
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,3584,2560,0.1021760031580925
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,3584,2048,0.09481599926948547
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,3584,2048,0.08611200004816055
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,3584,1536,0.0761599987745285
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,3584,1024,0.05951999872922897
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,3584,1536,0.0791039988398552
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,3584,1536,0.06940799951553345
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,3584,1024,0.05753599852323532
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,3584,1024,0.053119998425245285
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,3584,768,0.049215998500585556
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,3584,768,0.04879999905824661
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,3584,512,0.037087999284267426
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,3584,768,0.0453759990632534
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,3584,512,0.038015998899936676
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,3584,512,0.03728000074625015
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,3584,256,0.02377600036561489
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,3584,256,0.023711999878287315
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,3584,128,0.018688000738620758
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,3584,256,0.02828799933195114
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,3584,128,0.018559999763965607
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,3584,128,0.02454400062561035
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,3584,64,0.02175999991595745
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,3584,64,0.020255999639630318
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,3584,64,0.02195199951529503
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,3584,32,0.02300800010561943
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,3584,32,0.022784000262618065
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,3584,32,0.02236800082027912
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,3584,65536,2.3067519664764404
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,3072,16384,0.4559679925441742
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,3072,12288,0.3476479947566986
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,3072,16384,0.47200000286102295
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,3072,12288,0.35417601466178894
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,3072,65536,1.9883840084075928
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,3072,16384,0.39769598841667175
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,3072,12288,0.3036479949951172
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,3072,10240,0.2948479950428009
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,3072,10240,0.3023039996623993
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,3072,8192,0.2431039959192276
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,3072,8192,0.2385600060224533
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,3072,7168,0.21084800362586975
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,3072,10240,0.258432000875473
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,3072,7168,0.21558399498462677
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,3072,8192,0.20819200575351715
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,3072,6144,0.18316799402236938
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,3072,7168,0.18559999763965607
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,3072,6144,0.18956799805164337
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,3072,5120,0.15779200196266174
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,3072,5120,0.1613440066576004
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,3072,6144,0.1611199975013733
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,3072,4096,0.12691199779510498
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,3072,4096,0.13126400113105774
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,3072,5120,0.13897599279880524
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,3072,3584,0.11443199962377548
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,3072,3584,0.11734399944543839
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,3072,3072,0.10307200253009796
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,3072,4096,0.11433599889278412
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,3072,3584,0.10233599692583084
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,3072,65536,1.8293440341949463
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,3072,3072,0.10019200295209885
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,3072,2560,0.08697599917650223
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,3072,2560,0.08819200098514557
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,3072,2048,0.07414399832487106
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,3072,3072,0.09001599997282028
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,3072,2048,0.07372800260782242
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,3072,2560,0.07798399776220322
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,3072,2048,0.06659200042486191
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,3072,1536,0.05958399921655655
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,3072,1024,0.04521600157022476
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,3072,1536,0.06163199990987778
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,3072,1024,0.046592000871896744
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,3072,1536,0.053408000618219376
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,3072,768,0.039455998688936234
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,3072,1024,0.04137599840760231
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,3072,768,0.03993599861860275
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,3072,512,0.032416000962257385
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,3072,768,0.03574400022625923
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,3072,512,0.033344000577926636
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,3072,512,0.029983999207615852
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,3072,256,0.021023999899625778
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,3072,256,0.021183999255299568
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,3072,256,0.023423999547958374
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,3072,128,0.016863999888300896
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,3072,128,0.016831999644637108
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,3072,128,0.021695999428629875
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,3072,64,0.019328000023961067
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,3072,64,0.020096000283956528
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,3072,64,0.01945599913597107
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,3072,32,0.02115200087428093
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,3072,32,0.02022399939596653
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,3072,32,0.01945599913597107
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,3072,65536,1.621216058731079
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,2560,12288,0.31331199407577515
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,2560,16384,0.40937599539756775
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,2560,12288,0.3097279965877533
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,2560,10240,0.26307201385498047
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,2560,65536,1.727936029434204
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,2560,16384,0.39948800206184387
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,2560,10240,0.2635520100593567
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,2560,16384,0.4094719886779785
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,2560,12288,0.3031359910964966
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,2560,8192,0.21676799654960632
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,2560,8192,0.21222400665283203
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,2560,7168,0.1884479969739914
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,2560,10240,0.25830399990081787
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,2560,7168,0.1897280067205429
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,2560,8192,0.2078399956226349
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,2560,6144,0.16790400445461273
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,2560,6144,0.16460800170898438
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,2560,5120,0.14182400703430176
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,2560,7168,0.18518400192260742
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,2560,5120,0.14425599575042725
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,2560,6144,0.1629759967327118
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,2560,5120,0.137472003698349
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,2560,4096,0.1189119964838028
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,2560,3584,0.11011199653148651
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,2560,3584,0.1106560006737709
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,2560,4096,0.1215360015630722
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,2560,65536,1.6450879573822021
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,2560,4096,0.11315199732780457
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,2560,3072,0.09683199971914291
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,2560,2560,0.0883840024471283
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,2560,3072,0.0984639972448349
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,2560,3584,0.10262399911880493
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,2560,2560,0.08671999722719193
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,2560,3072,0.09046400338411331
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,2560,2560,0.07740800082683563
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,2560,1536,0.06185600161552429
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,2560,2048,0.07225599884986877
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,2560,2048,0.0745600014925003
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,2560,1536,0.060416001826524734
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,2560,2048,0.06614399701356888
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,2560,1536,0.05331199988722801
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,2560,1024,0.044895999133586884
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,2560,1024,0.04543999955058098
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,2560,1024,0.04121600091457367
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,2560,768,0.039583999663591385
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,2560,768,0.03859199956059456
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,2560,512,0.02860799990594387
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,2560,768,0.035551998764276505
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,2560,512,0.029184000566601753
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,2560,512,0.030079999938607216
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,2560,256,0.0191040001809597
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,2560,256,0.019551999866962433
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,2560,256,0.022304000332951546
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,2560,128,0.01548799965530634
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,2560,128,0.015456000342965126
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,2560,128,0.020255999639630318
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,2560,64,0.017535999417304993
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,2560,64,0.018239999189972878
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,2560,32,0.019071999937295914
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,2560,64,0.018559999763965607
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,2560,32,0.018719999119639397
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,2560,32,0.01833599992096424
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,2560,65536,1.6557120084762573
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,2048,12288,0.2356799989938736
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,2048,16384,0.3065600097179413
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,2048,12288,0.2425599992275238
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,2048,16384,0.31753599643707275
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,2048,16384,0.26864001154899597
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,2048,12288,0.2043199986219406
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,2048,65536,1.2933119535446167
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,2048,10240,0.20508800446987152
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,2048,10240,0.19776000082492828
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,2048,8192,0.16223999857902527
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,2048,10240,0.17212800681591034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,2048,8192,0.16592000424861908
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,2048,8192,0.14028799533843994
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,2048,7168,0.14688000082969666
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,2048,7168,0.1424960047006607
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,2048,6144,0.12521600723266602
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,2048,6144,0.12748800218105316
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,2048,7168,0.12617599964141846
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,2048,5120,0.10707200318574905
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,2048,5120,0.10976000130176544
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,2048,6144,0.11030399799346924
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,2048,5120,0.09321600198745728
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,2048,4096,0.088128000497818
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,2048,65536,1.3074560165405273
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,2048,4096,0.08975999802350998
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,2048,4096,0.07727999985218048
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,2048,3584,0.08028800040483475
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,2048,3584,0.08118399977684021
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,2048,3072,0.06976000219583511
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,2048,3584,0.06995200365781784
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,2048,2560,0.06143999844789505
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,2048,3072,0.07203199714422226
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,2048,3072,0.06172800064086914
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,2048,2560,0.0660799965262413
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,2048,2048,0.052319999784231186
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,2048,2048,0.053279999643564224
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,2048,1536,0.04310400038957596
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,2048,2048,0.04569600149989128
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,2048,2560,0.05382400006055832
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,2048,1536,0.03779200091958046
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,2048,1024,0.03385600075125694
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,2048,1024,0.03436800092458725
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,2048,768,0.0307839997112751
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,2048,1024,0.03001599945127964
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,2048,768,0.031007999554276466
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,2048,768,0.026240000501275063
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,2048,512,0.024671999737620354
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,2048,512,0.025472000241279602
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,2048,256,0.017055999487638474
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,2048,512,0.02195199951529503
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,2048,256,0.01759999990463257
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,2048,65536,1.2010560035705566
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,2048,1536,0.04416000097990036
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,2048,128,0.013535999692976475
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,2048,128,0.015072000212967396
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,2048,256,0.017216000705957413
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,2048,128,0.016896000131964684
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,2048,64,0.01616000011563301
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,2048,64,0.016416000202298164
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,2048,64,0.015519999898970127
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,2048,32,0.017311999574303627
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,2048,32,0.017023999243974686
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,2048,32,0.01548799965530634
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,1536,12288,0.1855359971523285
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,1536,12288,0.18883199989795685
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,1536,16384,0.23968000710010529
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,1536,16384,0.24489599466323853
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,1536,16384,0.26655998826026917
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,1536,10240,0.20313599705696106
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,1536,12288,0.20399999618530273
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,1536,10240,0.15990400314331055
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,1536,65536,1.151487946510315
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,1536,65536,0.8714240193367004
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,1536,10240,0.1720000058412552
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,1536,7168,0.12134400010108948
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,1536,8192,0.1337279975414276
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,1536,8192,0.13468800485134125
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,1536,7168,0.11894399672746658
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,1536,8192,0.1401599943637848
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,1536,6144,0.1056319996714592
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,1536,7168,0.12454400211572647
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,1536,6144,0.10758399963378906
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,1536,5120,0.09238400310277939
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,1536,6144,0.10931199789047241
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,1536,5120,0.09548799693584442
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,1536,4096,0.07983999699354172
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,1536,5120,0.09305600076913834
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,1536,4096,0.07791999727487564
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,1536,3584,0.07289600372314453
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,1536,4096,0.07699199765920639
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,1536,3584,0.0745600014925003
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,1536,3072,0.06483200192451477
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,1536,3584,0.06998399645090103
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,1536,3072,0.06604799628257751
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,1536,2560,0.05689600110054016
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,1536,3072,0.06124800071120262
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,1536,2560,0.05718399956822395
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,1536,2048,0.04867200180888176
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,1536,2048,0.04896000027656555
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,1536,2560,0.05452800169587135
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,1536,1536,0.03996799886226654
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,1536,1536,0.04022400081157684
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,1536,2048,0.045024000108242035
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,1536,1536,0.03625600039958954
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,1536,1024,0.031072000041604042
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,1536,1024,0.03139200061559677
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,1536,768,0.03206399828195572
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,1536,1024,0.027936000376939774
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,1536,768,0.02691200003027916
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,1536,768,0.023391999304294586
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,1536,512,0.020608000457286835
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,1536,512,0.02131200022995472
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,1536,256,0.015135999768972397
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,1536,512,0.019519999623298645
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,1536,256,0.015263999812304974
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,1536,65536,1.0592639446258545
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,1536,256,0.01500799972563982
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,1536,128,0.012384000234305859
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,1536,128,0.012319999746978283
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,1536,64,0.015552000142633915
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,1536,128,0.014015999622642994
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,1536,64,0.015615999698638916
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,1536,64,0.012768000364303589
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,1536,32,0.01600000075995922
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,1536,32,0.016095999628305435
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,1536,32,0.013439999893307686
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,1024,12288,0.12383999675512314
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,1024,12288,0.12211199849843979
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,1024,16384,0.316895991563797
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,1024,16384,0.15875199437141418
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,1024,16384,0.13711999356746674
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,1024,10240,0.10476800054311752
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,1024,12288,0.10463999956846237
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,1024,10240,0.10611200332641602
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,1024,8192,0.08556800335645676
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,1024,8192,0.08675199747085571
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,1024,7168,0.07539200037717819
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,1024,8192,0.07334399968385696
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,1024,7168,0.07711999863386154
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,1024,7168,0.06560000032186508
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,1024,6144,0.06639999896287918
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,1024,10240,0.08988799899816513
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,1024,6144,0.06784000247716904
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,1024,65536,0.6136320233345032
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,1024,65536,0.6389439702033997
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,1024,6144,0.05766399949789047
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,1024,5120,0.05859199911355972
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,1024,5120,0.10822399705648422
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,1024,4096,0.04995200037956238
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,1024,4096,0.04848000034689903
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,1024,5120,0.05075199902057648
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,1024,3584,0.04451199993491173
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,1024,3584,0.0459199994802475
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,1024,4096,0.04281599819660187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,1024,3584,0.03743999823927879
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,1024,65536,0.5163519978523254
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,1024,3072,0.03968000039458275
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,1024,3072,0.040063999593257904
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,1024,2560,0.029600000008940697
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,1024,2560,0.036639999598264694
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,1024,3072,0.03385600075125694
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,1024,2560,0.03488000109791756
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,1024,2048,0.029600000008940697
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,1024,2048,0.049215998500585556
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,1024,1536,0.025696000084280968
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,1024,2048,0.02566399984061718
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,1024,1536,0.026815999299287796
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,1024,768,0.020320000126957893
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,1024,1024,0.02127999998629093
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,1024,1536,0.02160000056028366
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,1024,1024,0.021568000316619873
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,1024,1024,0.017696000635623932
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,1024,768,0.018912000581622124
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,1024,256,0.013439999893307686
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,1024,768,0.015552000142633915
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,1024,512,0.016287999227643013
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,1024,256,0.014751999638974667
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,1024,512,0.01756799966096878
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,1024,512,0.013663999736309052
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,1024,256,0.011487999930977821
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,1024,128,0.011136000044643879
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,1024,128,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,1024,64,0.014303999952971935
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,1024,32,0.013919999822974205
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,1024,128,0.01152000017464161
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,1024,64,0.011136000044643879
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,1024,64,0.013952000066637993
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,1024,32,0.014368000440299511
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,1024,32,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,768,12288,0.11788800358772278
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,768,12288,0.12006399780511856
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,768,16384,0.15574400126934052
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,768,16384,0.15436799824237823
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,768,16384,0.13737599551677704
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,768,12288,0.10441599786281586
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,768,10240,0.1032319962978363
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,768,10240,0.10400000214576721
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,768,10240,0.08979199826717377
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,768,8192,0.08617600053548813
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,768,8192,0.08720000088214874
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,768,65536,0.7082560062408447
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,768,65536,0.6528639793395996
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,768,8192,0.07331199944019318
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,768,7168,0.07500799745321274
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,768,7168,0.07811199873685837
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,768,7168,0.06560000032186508
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,768,6144,0.0663679987192154
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,768,6144,0.06841599941253662
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,768,5120,0.05862399935722351
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,768,5120,0.05849599838256836
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,768,6144,0.057920001447200775
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,768,4096,0.048576001077890396
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,768,5120,0.04915200173854828
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,768,4096,0.04886399954557419
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,768,3584,0.0451200008392334
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,768,3584,0.04521600157022476
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,768,4096,0.04182400181889534
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,768,3584,0.038176000118255615
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,768,65536,0.5613759756088257
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,768,3072,0.03859199956059456
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,768,3072,0.039744000881910324
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,768,2560,0.035551998764276505
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,768,3072,0.033663999289274216
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,768,2560,0.03561599925160408
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,768,2048,0.03404799848794937
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,768,2560,0.0297279991209507
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,768,2048,0.030432000756263733
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,768,2048,0.02489599958062172
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,768,1536,0.025151999667286873
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,768,1536,0.025312000885605812
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,768,1536,0.020767999812960625
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,768,1024,0.019872000440955162
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,768,1024,0.02035200037062168
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,768,1024,0.01679999940097332
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,768,768,0.01759999990463257
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,768,768,0.01756799966096878
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,768,512,0.015519999898970127
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,768,256,0.013088000006973743
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,768,512,0.014751999638974667
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,768,768,0.014655999839305878
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,768,512,0.012736000120639801
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,768,256,0.010688000358641148
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,768,128,0.010143999941647053
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,768,128,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,768,128,0.01033599954098463
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,768,64,0.013472000136971474
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,768,64,0.013120000250637531
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,768,64,0.009600000455975533
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,768,32,0.01375999953597784
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,768,32,0.013952000066637993
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,768,32,0.009759999811649323
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,768,256,0.012703999876976013
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,512,12288,0.0732479989528656
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,512,12288,0.07145600020885468
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,512,16384,0.09247999638319016
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,512,16384,0.09292799979448318
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,512,16384,0.09795200079679489
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,512,10240,0.0695360004901886
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,512,12288,0.07539200037717819
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,512,10240,0.06460800021886826
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,512,10240,0.06480000168085098
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,512,8192,0.05081599950790405
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,512,8192,0.07078400254249573
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,512,65536,0.3856320083141327
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,512,8192,0.05251200124621391
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,512,7168,0.047359999269247055
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,512,7168,0.04668800160288811
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,512,6144,0.04163200035691261
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,512,7168,0.04806400090456009
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,512,6144,0.041600000113248825
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,512,65536,0.3765760064125061
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,512,6144,0.04182400181889534
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,512,5120,0.03702399879693985
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,512,5120,0.037151999771595
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,512,4096,0.03033600002527237
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,512,5120,0.0363520011305809
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,512,4096,0.030079999938607216
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,512,4096,0.0306560005992651
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,512,65536,0.3686079978942871
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,512,3584,0.02751999907195568
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,512,3584,0.027424000203609467
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,512,3584,0.02783999964594841
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,512,3072,0.02470399998128414
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,512,3072,0.02486399933695793
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,512,3072,0.02470399998128414
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,512,2560,0.022272000089287758
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,512,2560,0.032127998769283295
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,512,2560,0.021824000403285027
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,512,2048,0.019392000511288643
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,512,2048,0.019840000197291374
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,512,1536,0.017311999574303627
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,512,1536,0.017216000705957413
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,512,2048,0.019487999379634857
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,512,1536,0.016767999157309532
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,512,1024,0.0144640002399683
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,512,1024,0.01462399959564209
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,512,1024,0.013344000093638897
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,512,768,0.01321600005030632
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,512,768,0.013472000136971474
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,512,512,0.01679999940097332
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,512,768,0.012319999746978283
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,512,512,0.012160000391304493
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,512,512,0.010847999714314938
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,512,256,0.011327999643981457
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,512,256,0.010975999757647514
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,512,256,0.009216000325977802
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,512,128,0.009184000082314014
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,512,128,0.008991999551653862
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,512,128,0.009568000212311745
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,512,64,0.012191999703645706
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,512,64,0.01360000018030405
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,512,64,0.009535999968647957
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,512,32,0.013183999806642532
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,512,32,0.013183999806642532
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,512,32,0.009440000168979168
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,256,12288,0.0533440001308918
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,256,16384,0.06870400160551071
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,256,12288,0.0541439987719059
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,256,16384,0.07023999840021133
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,256,16384,0.057312000542879105
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,256,12288,0.045152001082897186
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,256,10240,0.04742399975657463
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,256,10240,0.04825599864125252
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,256,10240,0.038784001022577286
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,256,8192,0.0390079990029335
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,256,8192,0.03999999910593033
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,256,8192,0.03222399950027466
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,256,7168,0.03558399900794029
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,256,7168,0.0360959991812706
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,256,7168,0.029279999434947968
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,256,6144,0.03142400085926056
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,256,65536,0.22412799298763275
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,256,65536,0.23926399648189545
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,256,6144,0.032575998455286026
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,256,6144,0.026399999856948853
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,256,4096,0.028672000393271446
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,256,5120,0.028896000236272812
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,256,65536,0.2040960043668747
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,256,5120,0.023072000592947006
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,256,4096,0.024447999894618988
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,256,4096,0.01990400068461895
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,256,3584,0.021344000473618507
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,256,3584,0.022463999688625336
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,256,3584,0.018464000895619392
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,256,3072,0.02035200037062168
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,256,3072,0.01961600035429001
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,256,3072,0.01708799973130226
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,256,2560,0.018400000408291817
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,256,2560,0.018464000895619392
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,256,2560,0.015168000012636185
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,256,2048,0.01894400082528591
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,256,2048,0.013567999936640263
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,256,2048,0.016224000602960587
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,256,1536,0.014399999752640724
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,256,1536,0.014879999682307243
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,256,1536,0.012191999703645706
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,256,5120,0.028831999748945236
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,256,1024,0.012543999589979649
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,256,768,0.01119999960064888
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,256,768,0.013504000380635262
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,256,1024,0.012640000320971012
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,256,1024,0.010304000228643417
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,256,768,0.0098879998549819
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,256,512,0.010432000271975994
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,256,512,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,256,512,0.010015999898314476
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,256,256,0.010080000385642052
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,256,256,0.010080000385642052
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,256,128,0.00825599953532219
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,256,256,0.008511999621987343
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,256,128,0.00854399986565113
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,256,128,0.008960000239312649
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,256,64,0.011776000261306763
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,256,32,0.012512000277638435
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,256,64,0.012256000190973282
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,256,64,0.008352000266313553
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,256,32,0.012608000077307224
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,256,32,0.008927999995648861
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,128,12288,0.050944000482559204
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,128,12288,0.05020799860358238
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,128,16384,0.10815999656915665
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,128,16384,0.06470400094985962
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,128,16384,0.041439998894929886
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,128,12288,0.03388800099492073
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,128,10240,0.044064000248909
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,128,10240,0.04505600035190582
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,128,10240,0.029440000653266907
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,128,8192,0.03705599904060364
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,128,8192,0.03699199855327606
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,128,8192,0.023744000121951103
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,128,65536,0.46486398577690125
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,128,7168,0.034912001341581345
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,128,7168,0.03270399942994118
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,128,6144,0.031039999797940254
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,128,7168,0.021247999742627144
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,128,65536,0.2126079946756363
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,128,6144,0.03049599938094616
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,128,6144,0.01897599920630455
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,128,5120,0.02687999978661537
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,128,4096,0.023104000836610794
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,128,5120,0.02672000043094158
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,128,4096,0.03376000002026558
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,128,65536,0.1327040046453476
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,128,5120,0.017952000722289085
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,128,3584,0.020640000700950623
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,128,4096,0.014495999552309513
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,128,3072,0.0191040001809597
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,128,3584,0.02006400004029274
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,128,3072,0.018464000895619392
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,128,2560,0.017311999574303627
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,128,3584,0.013632000423967838
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,128,3072,0.012480000033974648
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,128,2560,0.011680000461637974
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,128,2560,0.016672000288963318
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,128,2048,0.015424000099301338
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,128,2048,0.015168000012636185
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,128,2048,0.012768000364303589
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,128,1536,0.01375999953597784
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,128,1536,0.013952000066637993
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,128,1536,0.010143999941647053
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,128,1024,0.01228800043463707
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,128,1024,0.011392000131309032
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,128,1024,0.010400000028312206
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,128,768,0.01119999960064888
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,128,768,0.011264000087976456
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,128,768,0.008991999551653862
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,128,512,0.010239999741315842
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,128,512,0.01033599954098463
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,128,256,0.009216000325977802
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,128,512,0.008671999908983707
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,128,256,0.008991999551653862
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,128,256,0.008128000423312187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,128,128,0.008383999578654766
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,128,128,0.008767999708652496
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,128,128,0.00848000030964613
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,128,64,0.011615999974310398
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,128,64,0.011872000060975552
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,128,64,0.007968000136315823
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,128,32,0.012160000391304493
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,128,32,0.012160000391304493
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,128,32,0.008448000065982342
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,64,12288,0.051392000168561935
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,64,12288,0.050335999578237534
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,64,16384,0.06294400244951248
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,64,16384,0.06278400123119354
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,64,16384,0.04089599847793579
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,64,12288,0.03376000002026558
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,64,10240,0.04569600149989128
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,64,10240,0.04825599864125252
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,64,8192,0.036607999354600906
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,64,10240,0.02937600016593933
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,64,8192,0.03587200120091438
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,64,8192,0.02319999970495701
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,64,7168,0.03385600075125694
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,64,7168,0.022463999688625336
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,64,7168,0.03283200040459633
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,64,65536,0.20588800311088562
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,64,6144,0.030271999537944794
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,64,6144,0.029503999277949333
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,64,65536,0.13414399325847626
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,64,5120,0.02643200010061264
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,64,6144,0.028063999488949776
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,64,5120,0.017664000391960144
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,64,5120,0.026367999613285065
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,64,4096,0.021983999758958817
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,64,4096,0.021536000072956085
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,64,4096,0.02070399932563305
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,64,3584,0.0191040001809597
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,64,3072,0.017823999747633934
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,64,3584,0.0197759997099638
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,64,3072,0.018271999433636665
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,64,3584,0.015296000055968761
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,64,65536,0.21196800470352173
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,64,3072,0.013151999562978745
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,64,2560,0.01711999997496605
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,64,2560,0.016095999628305435
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,64,2560,0.011552000418305397
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,64,2048,0.014688000082969666
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,64,2048,0.015135999768972397
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,64,2048,0.011455999687314034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,64,1536,0.013504000380635262
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,64,1536,0.013344000093638897
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,64,1536,0.009727999567985535
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,64,1024,0.011648000217974186
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,64,1024,0.011327999643981457
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,64,768,0.011136000044643879
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,64,1024,0.009535999968647957
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,64,768,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,64,768,0.009088000282645226
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,64,512,0.010143999941647053
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,64,512,0.009696000255644321
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,64,512,0.00854399986565113
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,64,256,0.009440000168979168
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,64,256,0.009535999968647957
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,64,128,0.008128000423312187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,64,256,0.007935999892652035
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,64,128,0.007903999648988247
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,64,128,0.00825599953532219
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,64,64,0.011648000217974186
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,64,64,0.01158399973064661
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,64,64,0.0080960001796484
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,64,32,0.011839999817311764
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,64,32,0.011839999817311764
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,64,32,0.00854399986565113
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,32,12288,0.050783999264240265
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,32,12288,0.05180799961090088
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,32,16384,0.06185600161552429
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,32,16384,0.06966400146484375
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,32,16384,0.04131200164556503
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,32,12288,0.033440001308918
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,32,10240,0.04601600021123886
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,32,10240,0.04608000069856644
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,32,8192,0.03673600032925606
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,32,10240,0.028416000306606293
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,32,8192,0.036928001791238785
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,32,7168,0.033215999603271484
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,32,8192,0.02534399926662445
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,32,7168,0.03347200155258179
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,32,6144,0.030112000182271004
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,32,7168,0.03187200054526329
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,32,65536,0.20652799308300018
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,32,65536,0.2446720004081726
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,32,6144,0.028960000723600388
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,32,6144,0.027775999158620834
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,32,5120,0.026976000517606735
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,32,5120,0.02672000043094158
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,32,5120,0.024800000712275505
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,32,65536,0.1297599971294403
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,32,4096,0.02147199958562851
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,32,4096,0.021983999758958817
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,32,4096,0.019648000597953796
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,32,3584,0.019648000597953796
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,32,3584,0.02006400004029274
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,32,3584,0.017920000478625298
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,32,3072,0.01788800023496151
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,32,3072,0.018303999677300453
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,32,2560,0.01587199978530407
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,32,3072,0.015296000055968761
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,32,2560,0.01664000004529953
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,32,2048,0.014911999925971031
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,32,2048,0.015039999969303608
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,32,2048,0.011776000261306763
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,32,1536,0.013311999849975109
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,32,1536,0.013248000293970108
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,32,1536,0.010528000071644783
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,32,1024,0.01158399973064661
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,32,1024,0.011744000017642975
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,32,1024,0.009375999681651592
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,32,768,0.010400000028312206
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,32,768,0.010623999871313572
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,32,768,0.009440000168979168
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,32,512,0.010015999898314476
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,32,512,0.010080000385642052
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,32,512,0.00854399986565113
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,32,256,0.009440000168979168
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,32,256,0.009344000369310379
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,32,128,0.008383999578654766
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,32,256,0.007648000027984381
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,32,2560,0.014015999622642994
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,32,128,0.007903999648988247
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,32,128,0.008576000109314919
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,32,64,0.011680000461637974
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,32,64,0.012032000347971916
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4096,32,32,0.01152000017464161
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,32,64,0.008224000222980976
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4096,32,32,0.011807999573647976
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4096,32,32,0.008576000109314919
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,65536,10240,3.1735999584198
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,65536,10240,2.9728000164031982
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,65536,12288,3.7629120349884033
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,65536,12288,3.487776041030884
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,65536,16384,4.857344150543213
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,65536,16384,4.651872158050537
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,65536,8192,2.5165441036224365
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,65536,8192,2.392767906188965
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,65536,12288,3.2047040462493896
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,65536,6144,1.7980480194091797
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,65536,7168,2.080319881439209
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,65536,7168,2.2688000202178955
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,65536,5120,1.7386879920959473
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,65536,8192,2.164896011352539
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,65536,6144,1.9017280340194702
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,65536,16384,4.186367988586426
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,65536,10240,2.695391893386841
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,65536,5120,1.6191680431365967
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,65536,4096,1.2352319955825806
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,65536,4096,1.3049919605255127
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,65536,7168,1.9155199527740479
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,65536,3584,1.1015679836273193
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,65536,6144,1.6763520240783691
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,65536,3072,0.9828159809112549
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,65536,5120,1.421247959136963
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,65536,3584,1.1492160558700562
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,65536,4096,1.1654399633407593
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,65536,3072,0.961184024810791
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,65536,2560,0.8214719891548157
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,65536,2560,0.8434879779815674
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,65536,1536,0.5327039957046509
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,65536,2048,0.6716480255126953
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,65536,1024,0.39417600631713867
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,65536,2048,0.692255973815918
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,65536,3072,0.901856005191803
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,65536,1536,0.5421760082244873
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,65536,3584,1.0351680517196655
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,65536,768,0.321152001619339
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,65536,2560,0.7767999768257141
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,65536,1024,0.403328001499176
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,65536,768,0.32841598987579346
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,65536,2048,0.6472960114479065
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,65536,512,0.24739199876785278
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,65536,1536,0.5188480019569397
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,65536,512,0.25491198897361755
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,65536,1024,0.38470399379730225
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,65536,256,0.13715200126171112
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,65536,768,0.3197759985923767
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,65536,128,0.11699199676513672
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,65536,256,0.13052800297737122
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,65536,128,0.11340799927711487
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,65536,64,0.11161600053310394
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,65536,512,0.2555519938468933
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,65536,64,0.11299200356006622
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,65536,256,0.1947840005159378
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,65536,32,0.11664000153541565
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,65536,32,0.11193600296974182
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,65536,128,0.15638400614261627
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,65536,64,0.13705599308013916
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,65536,32,0.13782399892807007
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,16384,16384,1.2323839664459229
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,16384,12288,0.9382399916648865
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,16384,12288,0.911296010017395
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,16384,16384,1.195904016494751
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,16384,10240,0.7596160173416138
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,16384,10240,0.7991039752960205
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,16384,12288,0.8048639893531799
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,16384,16384,1.0584319829940796
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,16384,8192,0.634335994720459
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,16384,8192,0.6148480176925659
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,16384,10240,0.6730560064315796
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,16384,7168,0.5423359870910645
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,16384,7168,0.5568000078201294
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,16384,8192,0.5495039820671082
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,16384,6144,0.48425599932670593
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,16384,6144,0.4679679870605469
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,16384,5120,0.3989120125770569
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,16384,7168,0.48339200019836426
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,16384,6144,0.4215039908885956
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,16384,5120,0.4126720130443573
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,16384,4096,0.32758399844169617
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,16384,4096,0.3352639973163605
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,16384,5120,0.3567039966583252
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,16384,3584,0.2898559868335724
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,16384,4096,0.2927359938621521
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,16384,3584,0.2999039888381958
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,16384,3072,0.2518720030784607
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,16384,3072,0.2600640058517456
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,16384,2560,0.21481600403785706
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,16384,3584,0.2632960081100464
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,16384,65536,4.8275837898254395
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,16384,2048,0.17776000499725342
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,16384,2560,0.22271999716758728
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,16384,3072,0.23119999468326569
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,16384,2048,0.18345600366592407
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,16384,1536,0.1454399973154068
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,16384,2560,0.19804799556732178
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,16384,1536,0.14256000518798828
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,16384,2048,0.16579200327396393
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,16384,1024,0.10556799918413162
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,16384,1536,0.1348479986190796
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,16384,1024,0.1090560033917427
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,16384,1024,0.10316800326108932
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,16384,768,0.08905600011348724
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,16384,768,0.08710400015115738
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,16384,768,0.08505599945783615
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,16384,512,0.06835199892520905
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,16384,65536,4.723008155822754
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,16384,512,0.07081600278615952
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,16384,256,0.03939199820160866
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,16384,256,0.038336001336574554
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,16384,512,0.06950400024652481
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,16384,128,0.03276799991726875
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,16384,128,0.034752000123262405
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,16384,256,0.05446400120854378
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,16384,64,0.03452799841761589
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,16384,128,0.04560000076889992
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,16384,64,0.034304000437259674
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,16384,32,0.03734400123357773
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,16384,64,0.040800001472234726
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,16384,32,0.03683200106024742
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,16384,32,0.041600000113248825
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,12288,12288,0.6826559901237488
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,12288,16384,0.9065600037574768
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,12288,12288,0.6995840072631836
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,12288,10240,0.5734720230102539
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,12288,65536,3.617664098739624
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,12288,16384,0.9196159839630127
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,12288,65536,3.518079996109009
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,12288,10240,0.5837759971618652
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,12288,12288,0.6055359840393066
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,12288,8192,0.47910401225090027
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,12288,16384,0.788703978061676
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,12288,8192,0.46355199813842773
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,12288,7168,0.41359999775886536
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,12288,6144,0.36236798763275146
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,12288,10240,0.5104640126228333
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,12288,7168,0.42262399196624756
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,12288,6144,0.35625600814819336
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,12288,5120,0.3081600069999695
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,12288,8192,0.41046398878097534
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,12288,7168,0.36447998881340027
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,12288,5120,0.30239999294281006
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,12288,4096,0.24502399563789368
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,12288,6144,0.31996798515319824
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,12288,4096,0.2537280023097992
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,12288,5120,0.2696320116519928
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,12288,3584,0.22355200350284576
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,16384,65536,4.079455852508545
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,12288,3584,0.21907199919223785
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,12288,3072,0.19676800072193146
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,12288,4096,0.22067199647426605
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,12288,3072,0.192671999335289
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,12288,2560,0.164000004529953
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,12288,2560,0.1695680022239685
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,12288,3584,0.1998080015182495
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,12288,3072,0.17440000176429749
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,12288,2048,0.13657599687576294
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,12288,1536,0.10851199924945831
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,12288,2048,0.13964800536632538
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,12288,2560,0.15027199685573578
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,12288,2048,0.12646399438381195
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,12288,1024,0.08460800349712372
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,12288,1536,0.11219199746847153
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,12288,1024,0.08214399963617325
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,12288,1536,0.10268799960613251
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,12288,1024,0.07884799689054489
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,12288,512,0.05423999950289726
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,12288,512,0.05603199824690819
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,12288,768,0.07046400010585785
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,12288,768,0.06710399687290192
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,12288,256,0.03232000023126602
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,12288,512,0.053727999329566956
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,12288,256,0.03126399964094162
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,12288,128,0.026048000901937485
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,12288,256,0.04265600070357323
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,12288,128,0.028416000306606293
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,12288,64,0.02735999971628189
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,12288,64,0.027744000777602196
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,12288,128,0.036288000643253326
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,12288,64,0.032575998455286026
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,12288,32,0.031072000041604042
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,12288,32,0.03750399872660637
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,12288,768,0.06784000247716904
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,12288,32,0.03267199918627739
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,10240,16384,0.7726079821586609
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,10240,12288,0.5736960172653198
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,10240,12288,0.5875200033187866
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,10240,16384,0.7565439939498901
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,10240,16384,0.6581760048866272
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,10240,10240,0.4965760111808777
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,10240,10240,0.4822399914264679
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,10240,12288,0.5015040040016174
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,10240,8192,0.4002240002155304
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,10240,8192,0.39344000816345215
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,10240,10240,0.4228160083293915
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,12288,65536,3.06112003326416
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,10240,7168,0.35254400968551636
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,10240,7168,0.34857600927352905
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,10240,8192,0.34275200963020325
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,10240,6144,0.29868799448013306
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,10240,6144,0.30979201197624207
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,10240,5120,0.25142401456832886
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,10240,6144,0.26556798815727234
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,10240,7168,0.30483201146125793
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,10240,5120,0.25811201333999634
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,10240,4096,0.21209600567817688
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,10240,65536,3.0781118869781494
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,10240,4096,0.2059839963912964
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,10240,5120,0.22543999552726746
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,10240,4096,0.1855040043592453
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,10240,3584,0.18348799645900726
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,10240,3584,0.18825599551200867
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,10240,3072,0.16198399662971497
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,10240,3584,0.16713599860668182
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,10240,3072,0.16816000640392303
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,10240,2560,0.14195199310779572
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,10240,2560,0.13843199610710144
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,10240,65536,3.0312321186065674
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,10240,3072,0.146464005112648
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,10240,2048,0.11500799655914307
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,10240,2560,0.12700800597667694
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,10240,2048,0.11884800344705582
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,10240,1536,0.09663999825716019
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,10240,1536,0.09216000139713287
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,10240,2048,0.1061440035700798
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,10240,768,0.05920000001788139
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,10240,1024,0.07119999825954437
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,10240,1024,0.07155200093984604
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,10240,1536,0.0867839977145195
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,10240,1024,0.06646399945020676
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,10240,768,0.06060799956321716
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,10240,512,0.046751998364925385
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,10240,512,0.049056001007556915
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,10240,768,0.05676800012588501
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,10240,256,0.02735999971628189
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,10240,256,0.02755199931561947
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,10240,512,0.04630399867892265
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,10240,256,0.036288000643253326
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,10240,128,0.023072000592947006
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,10240,128,0.022975999861955643
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,10240,64,0.02409599907696247
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,10240,64,0.024351999163627625
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,10240,128,0.03215999901294708
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,10240,64,0.02879999950528145
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,10240,32,0.027295999228954315
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,10240,32,0.02755199931561947
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,10240,32,0.029311999678611755
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,8192,16384,0.622111976146698
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,8192,12288,0.46054399013519287
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,8192,16384,0.608959972858429
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,8192,12288,0.4676800072193146
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,10240,65536,2.5523200035095215
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,8192,10240,0.3898879885673523
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,8192,16384,0.5278400182723999
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,8192,10240,0.39635199308395386
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,8192,8192,0.3198400139808655
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,8192,12288,0.40217599272727966
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,8192,8192,0.3118079900741577
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,8192,7168,0.27587199211120605
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,8192,10240,0.33980798721313477
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,8192,7168,0.2868160009384155
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,8192,8192,0.2751680016517639
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,8192,7168,0.24476799368858337
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,8192,6144,0.2391359955072403
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,8192,5120,0.20243200659751892
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,8192,6144,0.21347199380397797
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,8192,4096,0.16575999557971954
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,8192,5120,0.21036800742149353
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,8192,6144,0.24771200120449066
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,8192,5120,0.18137599527835846
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,8192,65536,2.482208013534546
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,8192,3584,0.14956800639629364
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,8192,4096,0.17132799327373505
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,8192,3584,0.15251199901103973
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,8192,4096,0.14876799285411835
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,8192,3072,0.13097600638866425
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,8192,3072,0.13331200182437897
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,8192,3584,0.13433599472045898
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,8192,2560,0.11494400352239609
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,8192,65536,2.3471360206604004
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,8192,3072,0.11798399686813354
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,8192,2560,0.11267200112342834
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,8192,2048,0.09299200028181076
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,8192,2560,0.10172799974679947
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,8192,1536,0.07625599950551987
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,8192,2048,0.09532800316810608
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,8192,1536,0.07843200117349625
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,8192,1024,0.05737600103020668
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,8192,2048,0.08550400286912918
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,8192,1024,0.05846399813890457
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,8192,1536,0.0703359991312027
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,8192,768,0.049247998744249344
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,8192,1024,0.05407999828457832
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,8192,768,0.050655998289585114
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,8192,512,0.039455998688936234
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,8192,512,0.03830400109291077
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,8192,768,0.046560000628232956
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,8192,256,0.023455999791622162
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,8192,512,0.03788800165057182
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,8192,256,0.02316799946129322
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,8192,128,0.019936000928282738
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,8192,128,0.01958400011062622
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,8192,256,0.031039999797940254
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,8192,128,0.026847999542951584
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,8192,64,0.021568000316619873
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,8192,64,0.021663999184966087
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,8192,64,0.023840000852942467
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,8192,32,0.023840000852942467
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,8192,32,0.024032000452280045
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,8192,32,0.02470399998128414
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,7168,16384,0.5703039765357971
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,7168,12288,0.4050559997558594
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,8192,65536,2.060544013977051
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,7168,16384,0.5286080241203308
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,7168,12288,0.4137600064277649
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,7168,65536,2.0779519081115723
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,7168,10240,0.3537600040435791
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,7168,16384,0.5320000052452087
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,7168,8192,0.2800639867782593
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,7168,8192,0.2884159982204437
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,7168,10240,0.3457280099391937
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,7168,12288,0.40220800042152405
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,7168,10240,0.33904001116752625
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,7168,7168,0.2575039863586426
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,7168,7168,0.2502079904079437
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,7168,8192,0.2773439884185791
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,7168,6144,0.22035199403762817
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,7168,7168,0.24473600089550018
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,7168,6144,0.22575999796390533
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,7168,5120,0.18985599279403687
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,7168,5120,0.19280000030994415
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,7168,4096,0.16179199516773224
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,7168,4096,0.15673600137233734
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,7168,6144,0.21305599808692932
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,7168,5120,0.18089599907398224
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,7168,3584,0.14348800480365753
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,7168,3072,0.12620800733566284
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,7168,3584,0.14127999544143677
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,7168,4096,0.14905600249767303
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,7168,3072,0.1276160031557083
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,7168,3584,0.13344000279903412
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,7168,2560,0.11215999722480774
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,7168,3072,0.11711999773979187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,7168,2560,0.11238399893045425
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,7168,2048,0.09372799843549728
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,7168,2560,0.10159999877214432
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,7168,2048,0.09455999732017517
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,7168,1536,0.07532799988985062
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,7168,2048,0.08540800213813782
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,7168,1536,0.07804799824953079
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,7168,1024,0.057440001517534256
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,7168,65536,2.062880039215088
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,7168,1536,0.06960000097751617
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,7168,768,0.04867200180888176
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,7168,1024,0.0578560009598732
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,7168,1024,0.05369599908590317
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,7168,768,0.04972799867391586
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,7168,512,0.035071998834609985
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,7168,512,0.03779200091958046
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,7168,768,0.04553600028157234
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,7168,256,0.021568000316619873
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,7168,512,0.03776000067591667
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,7168,256,0.021503999829292297
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,7168,128,0.018079999834299088
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,7168,256,0.02940800040960312
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,7168,128,0.01833599992096424
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,7168,64,0.020576000213623047
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,7168,64,0.020128000527620316
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,7168,128,0.024288000538945198
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,7168,32,0.023072000592947006
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,7168,64,0.021983999758958817
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,7168,32,0.022304000332951546
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,6144,16384,0.4575999975204468
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,7168,32,0.02271999977529049
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,6144,12288,0.3474240005016327
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,6144,12288,0.35446399450302124
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,6144,16384,0.46851199865341187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,6144,65536,1.8077119588851929
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,7168,65536,2.059999942779541
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,6144,16384,0.397599995136261
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,6144,8192,0.23600000143051147
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,6144,10240,0.29183998703956604
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,6144,10240,0.3017919957637787
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,6144,12288,0.3035520017147064
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,6144,7168,0.20979200303554535
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,6144,8192,0.2431039959192276
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,6144,10240,0.2582719922065735
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,6144,7168,0.21728000044822693
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,6144,8192,0.21036800742149353
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,6144,5120,0.15609599649906158
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,6144,6144,0.1823039948940277
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,6144,6144,0.19046400487422943
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,6144,7168,0.18460799753665924
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,6144,5120,0.160863995552063
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,6144,4096,0.12812800705432892
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,6144,4096,0.1305920034646988
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,6144,6144,0.16195200383663177
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,6144,3584,0.114656001329422
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,6144,3584,0.11708799749612808
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,6144,5120,0.1375039964914322
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,6144,4096,0.11398400366306305
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,6144,3072,0.10025600343942642
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,6144,3072,0.10374400019645691
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,6144,3584,0.10227199643850327
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,6144,2560,0.08620800077915192
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,6144,2560,0.0891840010881424
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,6144,3072,0.0902400016784668
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,6144,2048,0.0721919983625412
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,6144,65536,1.7773760557174683
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,6144,2560,0.07747200131416321
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,6144,2048,0.0740479975938797
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,6144,1536,0.05910399928689003
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,6144,1536,0.061184000223875046
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,6144,2048,0.06569600105285645
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,6144,1024,0.04575999826192856
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,6144,1024,0.04678399860858917
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,6144,1024,0.0416640006005764
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,6144,1536,0.05395200103521347
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,6144,768,0.038784001022577286
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,6144,768,0.0435199998319149
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,6144,512,0.03046399913728237
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,6144,768,0.036320000886917114
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,6144,512,0.030592000111937523
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,6144,256,0.019840000197291374
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,6144,512,0.03001599945127964
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,6144,256,0.019680000841617584
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,6144,256,0.02489599958062172
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,6144,128,0.016416000202298164
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,6144,128,0.016416000202298164
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,6144,128,0.021407999098300934
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,6144,64,0.019071999937295914
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,6144,64,0.01942400075495243
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,6144,32,0.02112000063061714
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,6144,64,0.01897599920630455
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,6144,32,0.021536000072956085
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,6144,32,0.019039999693632126
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,5120,16384,0.38655999302864075
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,5120,12288,0.2975679934024811
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,6144,65536,1.534208059310913
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,5120,16384,0.39529600739479065
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,5120,12288,0.3031040132045746
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,5120,10240,0.2534080147743225
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,5120,16384,0.3972479999065399
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,5120,65536,1.512511968612671
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,5120,8192,0.20627200603485107
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,5120,10240,0.25900799036026
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,5120,12288,0.302592009305954
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,5120,8192,0.21187199652194977
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,5120,10240,0.2555840015411377
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,5120,7168,0.19065600633621216
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,5120,7168,0.18544000387191772
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,5120,8192,0.20796799659729004
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,5120,6144,0.16339200735092163
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,5120,7168,0.1860480010509491
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,5120,5120,0.14588800072669983
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,5120,6144,0.16758400201797485
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,5120,5120,0.141184002161026
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,5120,4096,0.12252800166606903
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,5120,5120,0.13769599795341492
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,5120,6144,0.16150400042533875
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,5120,4096,0.1196800023317337
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,5120,3584,0.10751999914646149
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,5120,3584,0.11078400164842606
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,5120,4096,0.11327999830245972
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,5120,3072,0.09775999933481216
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,5120,3072,0.09801600128412247
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,5120,3584,0.1016319990158081
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,5120,65536,1.5066879987716675
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,5120,2560,0.08671999722719193
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,5120,3072,0.08931200206279755
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,5120,2048,0.0735040009021759
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,5120,2048,0.0737600028514862
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,5120,2560,0.07731200009584427
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,5120,1536,0.05910399928689003
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,5120,2048,0.06521599739789963
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,5120,1536,0.060575999319553375
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,5120,2560,0.08742400258779526
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,5120,1024,0.04464000090956688
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,5120,768,0.040511999279260635
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,5120,1024,0.04499199986457825
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,5120,1536,0.053408000618219376
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,5120,1024,0.04150399938225746
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,5120,768,0.03855999931693077
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,5120,768,0.03619199991226196
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,5120,512,0.02534399926662445
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,5120,256,0.01756799966096878
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,5120,512,0.02691200003027916
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,5120,512,0.02876799926161766
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,5120,256,0.01759999990463257
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,5120,256,0.023423999547958374
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,5120,128,0.015584000386297703
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,5120,128,0.015359999611973763
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,5120,64,0.01740800030529499
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,5120,128,0.020031999796628952
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,5120,64,0.017791999503970146
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,5120,64,0.01852799952030182
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,5120,32,0.02175999991595745
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,5120,32,0.018751999363303185
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,5120,32,0.01881599985063076
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,4096,12288,0.23289600014686584
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,5120,65536,1.5335359573364258
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,4096,16384,0.31696000695228577
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,4096,12288,0.23942400515079498
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,4096,16384,0.30793601274490356
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,4096,10240,0.20387199521064758
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,4096,10240,0.19840000569820404
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,4096,16384,0.2688319981098175
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,4096,12288,0.20479999482631683
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,4096,8192,0.1653439998626709
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,4096,65536,1.1876800060272217
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,4096,8192,0.15993599593639374
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,4096,10240,0.17235200107097626
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,4096,7168,0.14451199769973755
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,4096,8192,0.14211200177669525
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,4096,7168,0.1467839926481247
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,4096,6144,0.12352000176906586
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,4096,7168,0.12572799623012543
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,4096,6144,0.12716799974441528
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,4096,5120,0.10896000266075134
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,4096,6144,0.11072000116109848
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,4096,5120,0.10787200182676315
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,4096,4096,0.08806400001049042
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,4096,4096,0.08982399851083755
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,4096,5120,0.09404800087213516
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,4096,4096,0.07744000107049942
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,4096,65536,1.213919997215271
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,4096,3584,0.07843200117349625
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,4096,3584,0.0801599994301796
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,4096,3072,0.0711359977722168
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,4096,3584,0.06956800073385239
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,4096,3072,0.07126399874687195
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,4096,2560,0.06080000102519989
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,4096,3072,0.06201599910855293
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,4096,2560,0.061792001128196716
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,4096,2048,0.05177599936723709
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,4096,2048,0.05257600173354149
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,4096,2560,0.05385600030422211
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,4096,2048,0.045632001012563705
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,4096,1536,0.042688000947237015
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,4096,1536,0.04371200129389763
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,4096,1024,0.03404799848794937
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,4096,1536,0.03827200084924698
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,4096,1024,0.03471999987959862
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,4096,768,0.028543999418616295
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,4096,1024,0.029759999364614487
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,4096,768,0.029152000322937965
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,4096,512,0.023072000592947006
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,4096,768,0.02595200017094612
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,4096,512,0.022048000246286392
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,4096,256,0.015615999698638916
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,4096,512,0.02191999927163124
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,4096,256,0.016224000602960587
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,4096,128,0.013632000423967838
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,4096,256,0.01881599985063076
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,4096,128,0.013791999779641628
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,4096,128,0.016863999888300896
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,4096,64,0.0161920003592968
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,4096,64,0.016416000202298164
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,4096,32,0.017503999173641205
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,4096,64,0.015424000099301338
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,4096,32,0.017535999417304993
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,4096,32,0.015647999942302704
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,4096,65536,1.0323519706726074
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,3584,16384,0.312608003616333
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,3584,16384,0.3067519962787628
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,3584,12288,0.23712000250816345
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,3584,16384,0.26870399713516235
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,3584,65536,1.209439992904663
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,3584,10240,0.200095996260643
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,3584,10240,0.20278400182724
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,3584,12288,0.20403200387954712
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,3584,65536,1.1881599426269531
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,3584,12288,0.23919999599456787
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,3584,8192,0.16419200599193573
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,3584,7168,0.14707200229167938
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,3584,8192,0.16035200655460358
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,3584,7168,0.14348800480365753
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,3584,6144,0.12352000176906586
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,3584,10240,0.1737920045852661
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,3584,8192,0.14163200557231903
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,3584,7168,0.12601600587368011
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,3584,6144,0.1279039978981018
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,3584,5120,0.10921599715948105
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,3584,5120,0.10697600245475769
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,3584,6144,0.10927999764680862
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,3584,4096,0.08816000074148178
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,3584,5120,0.0936959981918335
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,3584,4096,0.09094399958848953
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,3584,3584,0.07846400141716003
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,3584,4096,0.07763200253248215
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,3584,3584,0.08028800040483475
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,3584,3072,0.06988800317049026
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,3584,3584,0.06988800317049026
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,3584,3072,0.0724480003118515
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,3584,2560,0.06201599910855293
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,3584,2560,0.0623680017888546
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,3584,3072,0.061503998935222626
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,3584,2048,0.05142400041222572
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,3584,2048,0.05183999985456467
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,3584,2560,0.053568001836538315
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,3584,1536,0.04262400045990944
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,3584,1536,0.04419200122356415
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,3584,2048,0.0461760014295578
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,3584,1536,0.03776000067591667
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,3584,1024,0.03311999887228012
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,3584,1024,0.040383998304605484
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,3584,1024,0.03017600066959858
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,3584,768,0.029600000008940697
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,3584,768,0.029184000566601753
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,3584,768,0.025919999927282333
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,3584,512,0.022975999861955643
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,3584,512,0.02223999984562397
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,3584,512,0.021856000646948814
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,3584,256,0.015519999898970127
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,3584,128,0.013567999936640263
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,3584,128,0.013728000223636627
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,3584,256,0.015584000386297703
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,3584,256,0.018848000094294548
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,3584,64,0.016607999801635742
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,3584,128,0.016287999227643013
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,3584,64,0.016575999557971954
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,3584,32,0.017055999487638474
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,3584,64,0.015456000342965126
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,3584,65536,1.0238399505615234
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,3584,32,0.016992000862956047
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,3584,32,0.015263999812304974
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,3072,12288,0.18880000710487366
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,3072,12288,0.18464000523090363
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,3072,10240,0.16124799847602844
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,3072,16384,0.24505600333213806
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,3072,16384,0.23971199989318848
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,3072,12288,0.20377600193023682
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,3072,16384,0.26655998826026917
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,3072,8192,0.13206399977207184
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,3072,65536,1.1548160314559937
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,3072,65536,1.2814719676971436
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,3072,10240,0.16435199975967407
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,3072,10240,0.17334400117397308
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,3072,7168,0.12371200323104858
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,3072,8192,0.1350719928741455
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,3072,7168,0.12179200351238251
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,3072,8192,0.14131200313568115
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,3072,6144,0.10678400099277496
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,3072,7168,0.1252799928188324
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,3072,5120,0.09756799787282944
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,3072,6144,0.1090560033917427
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,3072,5120,0.09356799721717834
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,3072,6144,0.10915199667215347
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,3072,4096,0.0793600007891655
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,3072,5120,0.09296000003814697
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,3072,4096,0.07971200346946716
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,3072,3584,0.07254400104284286
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,3072,3584,0.07302399724721909
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,3072,4096,0.07766400277614594
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,3072,3072,0.06592000275850296
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,3072,3072,0.06707199662923813
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,3072,3584,0.06950400024652481
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,3072,2560,0.05689600110054016
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,3072,3072,0.06115199998021126
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,3072,2560,0.05926400050520897
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,3072,2048,0.04790399968624115
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,3072,2048,0.05142400041222572
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,3072,2560,0.05347200110554695
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,3072,1536,0.04467200115323067
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,3072,2048,0.04499199986457825
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,3072,1536,0.040031999349594116
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,3072,1024,0.028031999245285988
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,3072,1024,0.027327999472618103
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,3072,1536,0.03638400137424469
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,3072,1024,0.028416000306606293
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,3072,768,0.02348800003528595
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,3072,768,0.02409599907696247
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,3072,512,0.01894400082528591
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,3072,512,0.019071999937295914
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,3072,256,0.014112000353634357
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,3072,512,0.019519999623298645
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,3072,256,0.014944000169634819
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,3072,256,0.0161920003592968
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,3072,128,0.012160000391304493
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,3072,128,0.012095999903976917
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,3072,128,0.01462399959564209
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,3072,65536,1.0249279737472534
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,3072,64,0.01532800029963255
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,3072,64,0.015359999611973763
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,3072,32,0.015936000272631645
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,3072,64,0.013248000293970108
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,3072,32,0.015936000272631645
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,3072,768,0.030208000913262367
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,3072,32,0.013344000093638897
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,2560,12288,0.15804800391197205
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,2560,16384,0.20217600464820862
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,2560,12288,0.162432000041008
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,2560,16384,0.20745599269866943
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,2560,10240,0.13897599279880524
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,2560,16384,0.2707200050354004
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,2560,10240,0.1401280015707016
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,2560,12288,0.19228799641132355
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,2560,8192,0.11417599767446518
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,2560,65536,0.7591680288314819
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,2560,8192,0.11657600104808807
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,2560,7168,0.10390400141477585
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,2560,10240,0.16288000345230103
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,2560,8192,0.12694400548934937
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,2560,7168,0.1053759977221489
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,2560,6144,0.09136000275611877
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,2560,6144,0.09136000275611877
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,2560,7168,0.11075200140476227
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,2560,6144,0.09888000041246414
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,2560,5120,0.08048000186681747
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,2560,4096,0.0687360018491745
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,2560,5120,0.0830719992518425
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,2560,5120,0.0828159973025322
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,2560,4096,0.06886400282382965
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,2560,65536,0.7390080094337463
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,2560,3584,0.062272001057863235
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,2560,3584,0.062463998794555664
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,2560,4096,0.06656000018119812
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,2560,3584,0.06032000109553337
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,2560,3072,0.0575999990105629
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,2560,2560,0.05113599821925163
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,2560,3072,0.05920000001788139
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,2560,3072,0.052799999713897705
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,2560,2560,0.05225599929690361
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,2560,2560,0.04505600035190582
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,2560,2048,0.04499199986457825
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,2560,2048,0.0453759990632534
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,2560,1536,0.04265600070357323
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,2560,2048,0.038336001336574554
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,2560,1536,0.037567999213933945
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,2560,1024,0.028511999174952507
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,2560,1536,0.03167999908328056
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,2560,1024,0.02687999978661537
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,2560,1024,0.02425600029528141
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,2560,768,0.023584000766277313
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,2560,768,0.02502400055527687
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,2560,65536,1.031999945640564
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,2560,768,0.021407999098300934
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,2560,256,0.013663999736309052
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,2560,512,0.018848000094294548
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,2560,512,0.01894400082528591
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,2560,256,0.014208000153303146
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,2560,512,0.019328000023961067
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,2560,256,0.015584000386297703
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,2560,128,0.011872000060975552
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,2560,128,0.011935999616980553
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,2560,64,0.01532800029963255
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,2560,64,0.015200000256299973
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,2560,128,0.013439999893307686
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,2560,64,0.012768000364303589
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,2560,32,0.015615999698638916
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,2560,32,0.015904000028967857
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,2560,32,0.012736000120639801
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,2048,12288,0.12211199849843979
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,2048,16384,0.1586879938840866
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,2048,12288,0.12387199699878693
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,2048,16384,0.16364799439907074
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,2048,16384,0.138047993183136
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,2048,10240,0.10400000214576721
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,2048,12288,0.10438399761915207
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,2048,8192,0.08499199897050858
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,2048,10240,0.10652799904346466
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,2048,10240,0.08966399729251862
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,2048,65536,0.5989760160446167
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,2048,8192,0.0870399996638298
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,2048,7168,0.07833600044250488
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,2048,7168,0.0764480009675026
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,2048,8192,0.0729919970035553
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,2048,6144,0.08463999629020691
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,2048,6144,0.06707199662923813
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,2048,5120,0.057631999254226685
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,2048,6144,0.05769599974155426
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,2048,5120,0.06038400158286095
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,2048,65536,0.6082879900932312
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,2048,5120,0.04972799867391586
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,2048,4096,0.04848000034689903
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,2048,4096,0.04153599962592125
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,2048,3584,0.04451199993491173
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,2048,4096,0.05027199909090996
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,2048,3072,0.04009599983692169
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,2048,3584,0.045152001082897186
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,2048,3584,0.03759999945759773
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,2048,7168,0.0658240020275116
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,2048,3072,0.04089599847793579
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,2048,3072,0.03372799977660179
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,2048,2560,0.03731200098991394
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,2048,2560,0.03596799820661545
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,2048,2560,0.029600000008940697
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,2048,2048,0.03030399978160858
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,2048,65536,0.5148159861564636
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,2048,2048,0.037696000188589096
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,2048,1536,0.026623999699950218
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,2048,2048,0.025887999683618546
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,2048,1536,0.02643200010061264
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,2048,1024,0.022592000663280487
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,2048,1024,0.02070399932563305
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,2048,768,0.018239999189972878
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,2048,1536,0.02143999934196472
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,2048,1024,0.01756799966096878
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,2048,768,0.01849599927663803
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,2048,768,0.01603199914097786
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,2048,512,0.015168000012636185
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,2048,512,0.01539199985563755
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,2048,256,0.012032000347971916
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,2048,256,0.011711999773979187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,2048,512,0.013504000380635262
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,2048,256,0.012640000320971012
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,2048,128,0.010591999627649784
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,2048,128,0.010944000445306301
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,2048,128,0.01142400037497282
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,2048,32,0.014208000153303146
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,2048,64,0.013311999849975109
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,2048,64,0.01360000018030405
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,2048,64,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,2048,32,0.014047999866306782
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,2048,32,0.011136000044643879
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,1536,16384,0.1562879979610443
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,1536,16384,0.15865600109100342
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,1536,12288,0.1186240017414093
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,1536,12288,0.12025599926710129
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,1536,16384,0.1372160017490387
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,1536,10240,0.104032002389431
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,1536,12288,0.10463999956846237
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,1536,10240,0.1042879968881607
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,1536,10240,0.08928000181913376
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,1536,8192,0.08483199775218964
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,1536,65536,0.5595200061798096
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,1536,8192,0.08767999708652496
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,1536,7168,0.07840000092983246
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,1536,7168,0.07772800326347351
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,1536,8192,0.07321599870920181
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,1536,7168,0.06563200056552887
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,1536,6144,0.06800000369548798
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,1536,6144,0.06755200028419495
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,1536,5120,0.058368001133203506
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,1536,6144,0.05740800127387047
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,1536,5120,0.05936000123620033
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,1536,65536,0.5672000050544739
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,1536,4096,0.04896000027656555
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,1536,5120,0.048928000032901764
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,1536,4096,0.04927999898791313
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,1536,4096,0.04105599969625473
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,1536,3584,0.043935999274253845
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,1536,3584,0.04444799944758415
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,1536,3584,0.03731200098991394
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,1536,3072,0.039903998374938965
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,1536,65536,0.5190079808235168
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,1536,3072,0.04054399952292442
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,1536,3072,0.03347200155258179
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,1536,2560,0.03561599925160408
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,1536,2560,0.03545600175857544
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,1536,2560,0.03017600066959858
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,1536,2048,0.02985600009560585
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,1536,2048,0.030112000182271004
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,1536,2048,0.024927999824285507
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,1536,1024,0.021663999184966087
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,1536,1536,0.020767999812960625
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,1536,1536,0.02598400041460991
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,1536,1536,0.025855999439954758
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,1536,1024,0.02070399932563305
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,1536,1024,0.01692800037562847
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,1536,768,0.018303999677300453
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,1536,768,0.01759999990463257
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,1536,512,0.015263999812304974
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,1536,512,0.015200000256299973
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,1536,512,0.012543999589979649
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,1536,768,0.01462399959564209
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,1536,256,0.011680000461637974
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,1536,256,0.011359999887645245
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,1536,256,0.011168000288307667
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,1536,128,0.010688000358641148
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,1536,128,0.01065600011497736
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,1536,128,0.01017600018531084
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,1536,64,0.013472000136971474
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,1536,64,0.009759999811649323
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,1536,32,0.013535999692976475
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,1536,32,0.013791999779641628
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,1536,32,0.010015999898314476
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,1024,16384,0.12160000205039978
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,1024,12288,0.07772800326347351
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,1024,16384,0.10496000200510025
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,1536,64,0.013504000380635262
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,1024,16384,0.0974079966545105
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,1024,12288,0.08144000172615051
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,1024,12288,0.07740800082683563
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,1024,10240,0.06918399780988693
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,1024,10240,0.07020799815654755
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,1024,8192,0.056992001831531525
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,1024,10240,0.0650240033864975
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,1024,65536,0.385343998670578
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,1024,8192,0.057312000542879105
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,1024,7168,0.051711998879909515
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,1024,8192,0.05273599922657013
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,1024,7168,0.050655998289585114
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,1024,7168,0.04758400097489357
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,1024,6144,0.05321599915623665
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,1024,6144,0.04553600028157234
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,1024,65536,0.3675839900970459
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,1024,6144,0.042208001017570496
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,1024,5120,0.03961599990725517
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,1024,5120,0.03951999917626381
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,1024,4096,0.04169600084424019
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,1024,5120,0.036288000643253326
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,1024,4096,0.041760001331567764
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,1024,65536,0.3872320055961609
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,1024,4096,0.030208000913262367
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,1024,3584,0.030719999223947525
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,1024,3584,0.03750399872660637
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,1024,3584,0.02768000029027462
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,1024,3072,0.032735999673604965
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,1024,3072,0.027648000046610832
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,1024,3072,0.024768000468611717
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,1024,2560,0.02502400055527687
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,1024,2560,0.024320000782608986
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,1024,2560,0.022112000733613968
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,1024,2048,0.031007999554276466
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,1024,2048,0.02147199958562851
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,1024,1536,0.01945599913597107
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,1024,2048,0.019711999222636223
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,1024,1536,0.020447999238967896
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,1024,1024,0.014783999882638454
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,1024,1536,0.017184000462293625
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,1024,1024,0.01600000075995922
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,1024,1024,0.013632000423967838
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,1024,768,0.014112000353634357
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,1024,768,0.014527999795973301
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,1024,768,0.012415999546647072
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,1024,512,0.012896000407636166
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,1024,512,0.012480000033974648
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,1024,512,0.010591999627649784
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,1024,256,0.010528000071644783
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,1024,256,0.01017600018531084
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,1024,128,0.009375999681651592
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,1024,256,0.01017600018531084
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,1024,128,0.009344000369310379
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,1024,128,0.009503999724984169
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,1024,64,0.012959999963641167
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,1024,64,0.012671999633312225
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,1024,64,0.009375999681651592
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,1024,32,0.013407999649643898
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,1024,32,0.013344000093638897
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,1024,32,0.009247999638319016
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,768,12288,0.06400000303983688
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,768,12288,0.06483200192451477
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,768,16384,0.07884799689054489
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,768,16384,0.07737600058317184
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,768,16384,0.08185599744319916
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,768,12288,0.0613120011985302
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,768,10240,0.05820799991488457
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,768,10240,0.058240000158548355
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,768,8192,0.049855999648571014
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,768,10240,0.053568001836538315
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,768,8192,0.05663999915122986
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,768,7168,0.0435199998319149
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,768,8192,0.04358400031924248
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,768,7168,0.046431999653577805
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,768,6144,0.043935999274253845
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,768,6144,0.03929600119590759
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,768,7168,0.0395519994199276
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,768,65536,0.24249599874019623
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,768,65536,0.2399359941482544
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,768,6144,0.034272000193595886
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,768,5120,0.03452799841761589
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,768,5120,0.035071998834609985
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,768,4096,0.029823999851942062
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,768,5120,0.03046399913728237
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,768,4096,0.029664000496268272
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,768,4096,0.025631999596953392
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,768,3584,0.028831999748945236
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,768,3584,0.023584000766277313
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,768,3584,0.027807999402284622
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,768,3072,0.02502400055527687
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,768,65536,0.3357119858264923
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,768,3072,0.020927999168634415
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,768,2560,0.02470399998128414
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,768,2048,0.02876799926161766
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,768,2560,0.02208000048995018
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,768,2048,0.020031999796628952
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,768,2048,0.016736000776290894
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,768,2560,0.01894400082528591
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,768,1536,0.01679999940097332
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,768,1536,0.017216000705957413
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,768,1024,0.014368000440299511
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,768,1536,0.014560000039637089
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,768,1024,0.014944000169634819
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,768,768,0.013632000423967838
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,768,1024,0.012480000033974648
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,768,768,0.013504000380635262
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,768,768,0.011392000131309032
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,768,512,0.01206399966031313
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,768,3072,0.036031998693943024
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,768,512,0.012128000147640705
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,768,512,0.010400000028312206
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,768,256,0.010304000228643417
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,768,256,0.01027199998497963
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,768,256,0.009855999611318111
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,768,128,0.009375999681651592
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,768,128,0.00940799992531538
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,768,128,0.008927999995648861
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,768,64,0.01228800043463707
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,768,64,0.01244799979031086
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,768,64,0.008799999952316284
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,768,32,0.012896000407636166
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,768,32,0.012959999963641167
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,768,32,0.00902399979531765
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,512,16384,0.06470400094985962
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,512,12288,0.05110400170087814
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,512,12288,0.05104000121355057
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,512,16384,0.06217600032687187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,512,16384,0.05660799890756607
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,512,12288,0.044544000178575516
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,512,10240,0.04560000076889992
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,512,10240,0.04572800174355507
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,512,8192,0.048287998884916306
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,512,10240,0.038816001266241074
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,512,8192,0.040192000567913055
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,512,7168,0.03641600161790848
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,512,8192,0.03200000151991844
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,512,7168,0.036896001547575
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,512,7168,0.02937600016593933
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,512,65536,0.17983999848365784
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,512,6144,0.031168000772595406
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,512,65536,0.20192000269889832
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,512,6144,0.03222399950027466
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,512,6144,0.026176000013947487
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,512,5120,0.027775999158620834
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,512,5120,0.02848000079393387
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,512,5120,0.02304000034928322
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,512,4096,0.02489599958062172
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,512,65536,0.2030079960823059
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,512,4096,0.024000000208616257
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,512,3584,0.026016000658273697
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,512,4096,0.019807999953627586
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,512,3584,0.031072000041604042
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,512,3072,0.02409599907696247
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,512,3584,0.01833599992096424
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,512,3072,0.020576000213623047
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,512,2560,0.018239999189972878
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,512,3072,0.01679999940097332
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,512,2560,0.018719999119639397
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,512,2560,0.015072000212967396
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,512,2048,0.01600000075995922
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,512,1536,0.016672000288963318
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,512,2048,0.01600000075995922
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,512,1536,0.014751999638974667
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,512,2048,0.013663999736309052
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,512,1536,0.011807999573647976
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,512,1024,0.012415999546647072
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,512,1024,0.012191999703645706
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,512,768,0.013088000006973743
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,512,768,0.012032000347971916
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,512,768,0.010304000228643417
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,512,1024,0.010400000028312206
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,512,512,0.010591999627649784
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,512,512,0.010688000358641148
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,512,256,0.008991999551653862
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,512,512,0.009920000098645687
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,512,256,0.009247999638319016
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,512,256,0.009759999811649323
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,512,128,0.008927999995648861
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,512,128,0.00863999966531992
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,512,128,0.00886400043964386
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,512,64,0.011776000261306763
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,512,32,0.012575999833643436
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,512,64,0.012032000347971916
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,512,32,0.012640000320971012
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,512,64,0.008320000022649765
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,512,32,0.008895999751985073
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,256,12288,0.040192000567913055
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,256,16384,0.04758400097489357
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,256,16384,0.04745600000023842
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,256,16384,0.03494400158524513
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,256,12288,0.028192000463604927
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,256,10240,0.03734400123357773
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,256,10240,0.03798399865627289
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,256,12288,0.04105599969625473
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,256,10240,0.024768000468611717
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,256,8192,0.03299200162291527
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,256,65536,0.1656319946050644
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,256,8192,0.033215999603271484
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,256,8192,0.021247999742627144
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,256,7168,0.032607998698949814
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,256,7168,0.03244800120592117
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,256,7168,0.019872000440955162
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,256,65536,0.12947200238704681
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,256,6144,0.029184000566601753
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,256,6144,0.02812799997627735
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,256,5120,0.028543999418616295
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,256,6144,0.017791999503970146
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,256,65536,0.1202239990234375
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,256,5120,0.0244159996509552
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,256,4096,0.022431999444961548
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,256,5120,0.01775999926030636
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,256,4096,0.02127999998629093
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,256,4096,0.014271999709308147
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,256,3584,0.02131200022995472
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,256,3584,0.0197759997099638
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,256,3584,0.013376000337302685
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,256,3072,0.018239999189972878
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,256,3072,0.019071999937295914
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,256,2560,0.01756799966096878
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,256,3072,0.013279999606311321
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,256,2560,0.01206399966031313
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,256,2560,0.016575999557971954
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,256,2048,0.0144640002399683
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,256,2048,0.015552000142633915
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,256,2048,0.012128000147640705
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,256,1536,0.013504000380635262
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,256,1536,0.01817600056529045
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,256,1536,0.010080000385642052
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,256,1024,0.012000000104308128
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,256,1024,0.012191999703645706
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,256,1024,0.008832000195980072
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,256,768,0.010847999714314938
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,256,768,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,256,512,0.010495999827980995
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,256,768,0.009312000125646591
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,256,512,0.009696000255644321
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,256,256,0.008799999952316284
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,256,512,0.009312000125646591
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,256,256,0.008287999778985977
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,256,256,0.008895999751985073
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,256,128,0.008128000423312187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,256,128,0.007935999892652035
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,256,128,0.008511999621987343
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,256,64,0.01158399973064661
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,256,64,0.012543999589979649
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,256,64,0.008448000065982342
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,256,32,0.011807999573647976
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,256,32,0.012512000277638435
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,256,32,0.008448000065982342
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,128,12288,0.038975998759269714
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,128,12288,0.03721600025892258
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,128,16384,0.04435199871659279
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,128,16384,0.04662400111556053
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,128,16384,0.03753599897027016
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,128,12288,0.02800000086426735
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,128,10240,0.03766399994492531
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,128,8192,0.03843199834227562
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,128,10240,0.03411199897527695
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,128,10240,0.04025600105524063
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,128,8192,0.029440000653266907
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,128,65536,0.11878400295972824
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,128,8192,0.0208320003002882
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,128,7168,0.028543999418616295
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,128,7168,0.028543999418616295
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,128,7168,0.01961600035429001
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,128,5120,0.024383999407291412
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,128,65536,0.12326399981975555
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,128,5120,0.023391999304294586
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,128,6144,0.025696000084280968
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,128,6144,0.026079999282956123
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,128,6144,0.017696000635623932
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,128,5120,0.022143999114632607
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,128,65536,0.11814399808645248
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,128,3584,0.019680000841617584
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,128,4096,0.018271999433636665
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,128,4096,0.020864000543951988
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,128,4096,0.02127999998629093
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,128,3584,0.017343999817967415
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,128,3584,0.019231999292969704
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,128,3072,0.017952000722289085
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,128,3072,0.017791999503970146
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,128,2560,0.016992000862956047
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,128,2560,0.015615999698638916
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,128,3072,0.012384000234305859
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,128,2560,0.013151999562978745
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,128,2048,0.014751999638974667
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,128,2048,0.014751999638974667
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,128,2048,0.011103999800980091
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,128,1536,0.013567999936640263
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,128,1536,0.012768000364303589
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,128,1536,0.009855999611318111
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,128,1024,0.011359999887645245
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,128,1024,0.008895999751985073
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,128,768,0.011264000087976456
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,128,768,0.010975999757647514
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,128,768,0.010432000271975994
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,128,512,0.009727999567985535
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,128,512,0.009375999681651592
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,128,512,0.009983999654650688
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,128,256,0.008128000423312187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,128,256,0.008576000109314919
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,128,256,0.008576000109314919
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,128,128,0.00825599953532219
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,128,128,0.008224000222980976
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,128,128,0.008031999692320824
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,128,64,0.011296000331640244
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,128,64,0.008031999692320824
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,128,64,0.011552000418305397
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,128,32,0.01206399966031313
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,128,32,0.012032000347971916
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,128,32,0.008128000423312187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,128,1024,0.01152000017464161
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,64,12288,0.038975998759269714
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,64,16384,0.04505600035190582
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,64,12288,0.036768000572919846
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,64,16384,0.0453759990632534
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,64,16384,0.03807999938726425
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,64,12288,0.046879999339580536
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,64,10240,0.033215999603271484
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,64,10240,0.03244800120592117
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,64,10240,0.03948799893260002
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,64,8192,0.030688000842928886
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,64,8192,0.02937600016593933
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,64,8192,0.032607998698949814
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,64,7168,0.029055999591946602
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,64,7168,0.029664000496268272
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,64,6144,0.026847999542951584
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,64,7168,0.019680000841617584
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,64,65536,0.12425599992275238
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,64,65536,0.13488000631332397
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,64,6144,0.025855999439954758
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,64,65536,0.11859200149774551
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,64,5120,0.023360000923275948
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,64,5120,0.023104000836610794
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,64,6144,0.017696000635623932
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,64,5120,0.01583999954164028
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,64,4096,0.02223999984562397
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,64,4096,0.02127999998629093
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,64,3072,0.018880000337958336
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,64,3584,0.019071999937295914
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,64,3584,0.019519999623298645
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,64,4096,0.013952000066637993
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,64,3584,0.013055999763309956
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,64,3072,0.01772800087928772
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,64,3072,0.014688000082969666
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,64,2560,0.01587199978530407
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,64,2560,0.01648000068962574
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,64,2048,0.014495999552309513
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,64,2048,0.015231999568641186
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,64,2560,0.011680000461637974
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,64,2048,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,64,1536,0.012415999546647072
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,64,1536,0.013120000250637531
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,64,1536,0.011103999800980091
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,64,1024,0.011935999616980553
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,64,1024,0.011487999930977821
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,64,768,0.011008000001311302
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,64,1024,0.009088000282645226
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,64,768,0.01033599954098463
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,64,768,0.009375999681651592
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,64,512,0.009983999654650688
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,64,512,0.009952000342309475
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,64,512,0.008511999621987343
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,64,256,0.007840000092983246
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,64,128,0.007872000336647034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,64,256,0.008224000222980976
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,64,256,0.008704000152647495
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,64,128,0.007903999648988247
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,64,128,0.00800000037997961
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,64,64,0.011103999800980091
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,64,64,0.011935999616980553
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,64,32,0.011552000418305397
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,64,64,0.007935999892652035
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,64,32,0.011744000017642975
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,64,32,0.008031999692320824
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,32,12288,0.03776000067591667
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,32,12288,0.03702399879693985
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,32,16384,0.04588799923658371
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,32,16384,0.04604800045490265
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,32,16384,0.03766399994492531
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,32,12288,0.04623999819159508
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,32,10240,0.032896000891923904
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,32,10240,0.031936001032590866
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,32,8192,0.030239999294281006
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,32,10240,0.038784001022577286
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,32,7168,0.02921600081026554
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,32,8192,0.031072000041604042
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,32,65536,0.11971200257539749
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,32,65536,0.1281919926404953
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,32,7168,0.029343999922275543
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,32,6144,0.026623999699950218
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,32,7168,0.02812799997627735
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,32,6144,0.02659199945628643
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,32,6144,0.024191999807953835
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,32,8192,0.02953599952161312
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,32,5120,0.023423999547958374
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,32,5120,0.023296000435948372
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,32,65536,0.11532799899578094
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,32,5120,0.020447999238967896
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,32,4096,0.020255999639630318
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,32,3584,0.02070399932563305
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,32,4096,0.020864000543951988
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,32,4096,0.017503999173641205
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,32,3584,0.019680000841617584
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,32,3584,0.01603199914097786
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,32,3072,0.017535999417304993
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,32,2560,0.015968000516295433
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,32,2560,0.016383999958634377
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,32,3072,0.018079999834299088
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,32,3072,0.01462399959564209
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,32,2560,0.013663999736309052
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,32,2048,0.014175999909639359
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,32,2048,0.014751999638974667
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,32,2048,0.011776000261306763
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,32,1536,0.013120000250637531
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,32,1024,0.011168000288307667
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,32,1536,0.012896000407636166
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,32,1536,0.010879999957978725
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,32,768,0.010912000201642513
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,32,1024,0.01142400037497282
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,32,1024,0.00979200005531311
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,32,768,0.010591999627649784
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,32,512,0.00979200005531311
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,32,768,0.010015999898314476
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,32,256,0.00825599953532219
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,32,512,0.009855999611318111
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,32,512,0.008128000423312187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,32,256,0.00825599953532219
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,32,256,0.008352000266313553
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,32,128,0.008191999979317188
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,32,128,0.007584000006318092
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,32,128,0.00848000030964613
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,32,64,0.011552000418305397
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,32,64,0.011552000418305397
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,32,64,0.007968000136315823
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2048,32,32,0.011839999817311764
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2048,32,32,0.011744000017642975
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2048,32,32,0.008191999979317188
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,65536,12288,1.8500800132751465
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,65536,16384,2.4282240867614746
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,65536,10240,1.5196479558944702
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,65536,16384,2.4006080627441406
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,65536,8192,1.2584960460662842
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,65536,10240,1.5547840595245361
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,65536,8192,1.2181119918823242
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,65536,7168,1.0982400178909302
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,65536,12288,1.7998080253601074
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,65536,7168,1.0909440517425537
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,65536,6144,0.9545599818229675
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,65536,10240,1.3504639863967896
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,65536,8192,1.0936000347137451
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,65536,12288,1.5936319828033447
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,65536,6144,0.9363200068473816
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,65536,5120,0.7823039889335632
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,65536,5120,0.8114879727363586
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,65536,7168,0.9685440063476562
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,65536,4096,0.6423680186271667
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,65536,6144,0.8412479758262634
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,65536,16384,2.09552001953125
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,65536,4096,0.6637120246887207
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,65536,3072,0.5060799717903137
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,65536,5120,0.7141759991645813
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,65536,2560,0.4192639887332916
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,65536,4096,0.5824000239372253
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,65536,3584,0.5838080048561096
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,65536,3584,0.5691840052604675
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,65536,3072,0.49433600902557373
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,65536,2048,0.3542720079421997
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,65536,1536,0.2744959890842438
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,65536,3072,0.4586560130119324
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,65536,3584,0.525983989238739
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,65536,2560,0.4336319863796234
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,65536,2048,0.347680002450943
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,65536,1024,0.20524799823760986
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,65536,1536,0.2818560004234314
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,65536,768,0.1648000031709671
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,65536,2560,0.39347198605537415
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,65536,1024,0.20102399587631226
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,65536,2048,0.32873600721359253
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,65536,512,0.11683200299739838
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,65536,768,0.16835199296474457
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,65536,1536,0.2686080038547516
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,65536,1024,0.20233599841594696
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,65536,512,0.11760000139474869
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,65536,256,0.06963200122117996
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,65536,768,0.1709440052509308
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,65536,256,0.07612799853086472
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,65536,128,0.061535999178886414
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,65536,512,0.13900800049304962
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,65536,128,0.062144000083208084
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,65536,128,0.08463999629020691
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,65536,64,0.06201599910855293
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,65536,32,0.06492800265550613
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,65536,64,0.06015999987721443
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,65536,32,0.0623680017888546
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,65536,64,0.07494399696588516
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,65536,32,0.0753600001335144
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,65536,256,0.10601600259542465
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,16384,12288,0.46489599347114563
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,16384,16384,0.620032012462616
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,16384,16384,0.6025919914245605
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,16384,12288,0.4697920083999634
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,16384,65536,2.3932158946990967
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,16384,10240,0.38489601016044617
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,16384,16384,0.5288000106811523
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,16384,65536,2.36678409576416
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,16384,8192,0.31727999448776245
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,16384,10240,0.3988800048828125
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,16384,8192,0.314303994178772
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,16384,12288,0.4063040018081665
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,16384,10240,0.33904001116752625
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,16384,7168,0.2772800028324127
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,16384,7168,0.28336000442504883
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,16384,6144,0.24102400243282318
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,16384,6144,0.24755200743675232
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,16384,8192,0.276095986366272
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,16384,5120,0.2070080041885376
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,16384,7168,0.24527999758720398
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,16384,4096,0.16969600319862366
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,16384,5120,0.20476800203323364
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,16384,4096,0.16595199704170227
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,16384,3584,0.14867199957370758
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,16384,5120,0.1826239973306656
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,16384,6144,0.21276800334453583
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,16384,3584,0.15270400047302246
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,16384,4096,0.14956800639629364
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,16384,3072,0.13315199315547943
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,16384,3072,0.13023999333381653
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,16384,2560,0.11257600039243698
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,16384,3584,0.13449600338935852
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,16384,2560,0.11414399743080139
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,16384,2048,0.09440000355243683
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,16384,2048,0.10259199887514114
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,16384,3072,0.11897599697113037
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,16384,2560,0.10236799716949463
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,16384,1536,0.07548800110816956
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,16384,2048,0.08665599673986435
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,16384,1024,0.05471999943256378
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,16384,1024,0.0560000017285347
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,16384,1536,0.07395199686288834
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,16384,1536,0.07068800181150436
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,16384,1024,0.054687999188899994
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,16384,768,0.04841599985957146
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,16384,768,0.043455999344587326
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,16384,512,0.033344000577926636
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,16384,512,0.03331200033426285
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,16384,768,0.047200001776218414
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,16384,256,0.02319999970495701
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,16384,512,0.040031999349594116
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,16384,256,0.023072000592947006
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,16384,256,0.031808000057935715
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,16384,128,0.019711999222636223
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,16384,128,0.02006400004029274
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,16384,64,0.021088000386953354
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,16384,128,0.026784000918269157
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,16384,64,0.021727999672293663
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,16384,64,0.02454400062561035
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,16384,32,0.02454400062561035
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,16384,32,0.024064000695943832
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,16384,32,0.02486399933695793
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,12288,12288,0.3447040021419525
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,12288,16384,0.4668799936771393
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,12288,16384,0.45772799849510193
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,12288,12288,0.35523200035095215
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,12288,65536,1.801632046699524
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,12288,10240,0.29123198986053467
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,12288,16384,0.40275201201438904
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,12288,12288,0.30377599596977234
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,12288,10240,0.29769599437713623
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,12288,8192,0.23792000114917755
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,16384,65536,2.0427520275115967
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,12288,8192,0.24297599494457245
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,12288,7168,0.2086080014705658
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,12288,10240,0.25814399123191833
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,12288,8192,0.2091519981622696
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,12288,7168,0.22313599288463593
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,12288,6144,0.18454399704933167
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,12288,6144,0.1887039989233017
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,12288,7168,0.18636800348758698
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,12288,5120,0.1554879993200302
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,12288,5120,0.15782399475574493
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,12288,6144,0.16358399391174316
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,12288,5120,0.13849599659442902
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,12288,4096,0.14707200229167938
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,12288,4096,0.12758399546146393
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,12288,4096,0.11407999694347382
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,12288,3584,0.11747200042009354
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,12288,3072,0.1000640019774437
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,12288,3584,0.10288000106811523
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,12288,3072,0.1037760004401207
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,12288,2560,0.08604799956083298
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,12288,65536,1.7801920175552368
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,12288,2048,0.07264000177383423
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,12288,3072,0.09040000289678574
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,12288,3584,0.11376000195741653
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,12288,2560,0.08867199718952179
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,12288,2560,0.0788159966468811
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,12288,2048,0.07449600100517273
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,12288,1536,0.05913599953055382
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,12288,1536,0.05920000001788139
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,12288,1024,0.04390399903059006
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,12288,2048,0.06588800251483917
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,12288,1536,0.05446400120854378
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,12288,1024,0.044064000248909
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,12288,768,0.03551999852061272
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,12288,768,0.03513599932193756
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,12288,1024,0.04278400167822838
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,12288,512,0.028192000463604927
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,12288,512,0.027615999802947044
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,12288,768,0.03619199991226196
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,12288,512,0.03174399957060814
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,12288,256,0.019519999623298645
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,12288,256,0.019551999866962433
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,12288,256,0.025728000327944756
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,12288,128,0.016287999227643013
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,12288,128,0.0163199994713068
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,12288,64,0.01852799952030182
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,12288,128,0.021824000403285027
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,12288,64,0.018912000581622124
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,12288,64,0.019551999866962433
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,12288,32,0.021376000717282295
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,12288,32,0.020576000213623047
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,12288,32,0.019392000511288643
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,12288,65536,1.5337599515914917
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,10240,16384,0.3946239948272705
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,10240,12288,0.3020159900188446
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,10240,12288,0.29628801345825195
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,10240,16384,0.38515201210975647
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,10240,10240,0.26047998666763306
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,10240,10240,0.25174400210380554
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,10240,16384,0.39769598841667175
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,10240,12288,0.3059839904308319
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,10240,65536,1.4736640453338623
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,10240,8192,0.20531199872493744
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,10240,7168,0.18380799889564514
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,10240,10240,0.2579520046710968
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,10240,8192,0.21148799359798431
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,10240,8192,0.20771199464797974
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,10240,7168,0.18943999707698822
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,10240,6144,0.1674879938364029
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,10240,6144,0.16470399498939514
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,10240,7168,0.18508799374103546
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,10240,5120,0.141184002161026
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,10240,6144,0.1621440052986145
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,10240,5120,0.14368000626564026
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,10240,4096,0.11155200004577637
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,10240,4096,0.11878400295972824
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,10240,5120,0.13900800049304962
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,10240,3584,0.10822399705648422
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,10240,4096,0.11356800049543381
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,10240,3584,0.09964799880981445
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,10240,3072,0.08710400015115738
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,10240,3584,0.10230399668216705
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,10240,65536,1.6012799739837646
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,10240,3072,0.08659200370311737
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,10240,2560,0.08524800091981888
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,10240,3072,0.09040000289678574
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,10240,2560,0.07635200023651123
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,10240,2048,0.0652799978852272
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,10240,2048,0.06335999816656113
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,10240,2560,0.07785599678754807
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,10240,1536,0.04960000142455101
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,10240,2048,0.06531199812889099
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,10240,1024,0.03776000067591667
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,10240,1536,0.049984000623226166
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,10240,1024,0.03753599897027016
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,10240,1536,0.05366399884223938
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,10240,1024,0.04198399931192398
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,10240,768,0.03081599995493889
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,10240,768,0.03097599931061268
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,10240,512,0.024512000381946564
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,10240,768,0.035999998450279236
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,10240,512,0.023903999477624893
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,10240,512,0.03161599859595299
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,10240,256,0.017791999503970146
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,10240,256,0.01756799966096878
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,10240,256,0.02380800060927868
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,10240,128,0.01532800029963255
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,10240,128,0.014879999682307243
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,10240,128,0.019648000597953796
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,10240,64,0.017664000391960144
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,10240,64,0.017632000148296356
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,10240,32,0.019807999953627586
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,10240,64,0.01836800016462803
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,10240,32,0.018271999433636665
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,10240,32,0.01849599927663803
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,8192,12288,0.23308800160884857
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,10240,65536,1.5326080322265625
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,8192,12288,0.2378239929676056
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,8192,16384,0.3052160143852234
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,8192,16384,0.3136320114135742
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,8192,16384,0.2690240144729614
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,8192,10240,0.19756799936294556
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,8192,12288,0.20688000321388245
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,8192,8192,0.15993599593639374
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,8192,65536,1.210528016090393
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,8192,10240,0.2011519968509674
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,8192,8192,0.16355200111865997
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,8192,65536,1.1926079988479614
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,8192,7168,0.14531199634075165
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,8192,7168,0.14188799262046814
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,8192,10240,0.1733119934797287
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,8192,6144,0.12521600723266602
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,8192,6144,0.1268160045146942
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,8192,8192,0.14131200313568115
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,8192,5120,0.10585600137710571
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,8192,7168,0.1252480000257492
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,8192,5120,0.10899200290441513
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,8192,4096,0.0891840010881424
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,8192,4096,0.08748800307512283
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,8192,6144,0.11068800091743469
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,8192,3584,0.07903999835252762
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,8192,5120,0.09379199892282486
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,8192,3584,0.08038400113582611
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,8192,4096,0.07811199873685837
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,8192,3072,0.06972800195217133
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,8192,3072,0.07180800288915634
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,8192,3584,0.07068800181150436
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,8192,2560,0.060736000537872314
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,8192,3072,0.06195199862122536
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,8192,2560,0.06252799928188324
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,8192,2048,0.05104000121355057
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,8192,2560,0.05395200103521347
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,8192,2048,0.05190400034189224
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,8192,2048,0.04620800167322159
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,8192,1536,0.04169600084424019
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,8192,1536,0.041471999138593674
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,8192,1024,0.03177599981427193
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,8192,1536,0.038656000047922134
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,8192,1024,0.031168000772595406
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,8192,1024,0.030432000756263733
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,8192,768,0.025887999683618546
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,8192,768,0.026528000831604004
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,8192,768,0.02630399912595749
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,8192,512,0.020896000787615776
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,8192,512,0.020767999812960625
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,8192,512,0.023455999791622162
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,8192,256,0.015359999611973763
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,8192,256,0.015647999942302704
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,8192,256,0.01897599920630455
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,8192,128,0.013728000223636627
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,8192,128,0.013407999649643898
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,8192,128,0.017343999817967415
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,8192,64,0.015936000272631645
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,8192,64,0.016575999557971954
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,8192,64,0.015647999942302704
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,8192,32,0.017472000792622566
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,8192,32,0.01724799908697605
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,8192,32,0.015519999898970127
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,8192,65536,1.0243840217590332
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,7168,12288,0.23292799293994904
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,7168,12288,0.23795199394226074
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,7168,16384,0.31219199299812317
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,7168,10240,0.19750399887561798
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,7168,16384,0.3078719973564148
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,7168,16384,0.26892799139022827
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,7168,12288,0.20422400534152985
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,7168,65536,1.2074240446090698
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,7168,8192,0.16131199896335602
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,7168,10240,0.20112000405788422
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,7168,8192,0.16435199975967407
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,7168,7168,0.1449279934167862
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,7168,10240,0.17388799786567688
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,7168,7168,0.14166399836540222
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,7168,6144,0.12479999661445618
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,7168,8192,0.14153599739074707
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,7168,7168,0.1250240057706833
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,7168,6144,0.12803199887275696
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,7168,5120,0.10860799998044968
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,7168,5120,0.10620799660682678
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,7168,6144,0.11007999628782272
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,7168,4096,0.08950400352478027
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,7168,5120,0.0936959981918335
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,7168,4096,0.08748800307512283
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,7168,3584,0.07926400005817413
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,7168,3584,0.08070400357246399
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,7168,4096,0.07715199887752533
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,7168,3072,0.07132799923419952
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,7168,3072,0.06995200365781784
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,7168,3584,0.07094399631023407
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,7168,2560,0.06067200005054474
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,7168,2560,0.06201599910855293
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,7168,65536,1.1813759803771973
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,7168,3072,0.062463998794555664
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,7168,2048,0.05270399898290634
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,7168,2560,0.054207999259233475
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,7168,1536,0.04435199871659279
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,7168,2048,0.0522879995405674
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,7168,1536,0.040991999208927155
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,7168,2048,0.04572800174355507
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,7168,1024,0.03136000037193298
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,7168,1024,0.03171199932694435
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,7168,1024,0.03014400042593479
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,7168,768,0.026240000501275063
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,7168,768,0.026208000257611275
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,7168,768,0.02643200010061264
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,7168,512,0.02127999998629093
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,7168,256,0.01539199985563755
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,7168,512,0.020800000056624413
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,7168,512,0.023711999878287315
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,7168,256,0.015647999942302704
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,7168,128,0.013535999692976475
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,7168,256,0.019200000911951065
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,7168,128,0.013279999606311321
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,7168,64,0.016863999888300896
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,7168,128,0.01664000004529953
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,7168,64,0.01664000004529953
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,7168,1536,0.03798399865627289
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,7168,64,0.01539199985563755
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,7168,65536,1.032863974571228
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,7168,32,0.017376000061631203
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,7168,32,0.01708799973130226
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,7168,32,0.015647999942302704
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,6144,12288,0.19366399943828583
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,6144,16384,0.24380800127983093
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,6144,12288,0.1884160041809082
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,6144,16384,0.2414720058441162
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,6144,10240,0.1592639982700348
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,6144,12288,0.20403200387954712
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,6144,16384,0.2667520046234131
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,6144,10240,0.164000004529953
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,6144,65536,0.9329919815063477
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,6144,8192,0.1343040019273758
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,6144,7168,0.12201599776744843
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,6144,10240,0.17443199455738068
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,6144,8192,0.13196800649166107
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,6144,7168,0.11606399714946747
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,6144,8192,0.14124800264835358
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,6144,6144,0.1088000014424324
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,6144,6144,0.09852799773216248
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,6144,5120,0.08374399691820145
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,6144,5120,0.09574399888515472
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,6144,7168,0.12460800260305405
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,6144,6144,0.10966400057077408
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,6144,4096,0.07814399898052216
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,6144,5120,0.09331200271844864
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,6144,3584,0.06454399973154068
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,6144,4096,0.07088000327348709
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,6144,3584,0.06735999882221222
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,6144,4096,0.07811199873685837
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,6144,3072,0.05567999929189682
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,6144,3072,0.06672000139951706
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,6144,3584,0.07011199742555618
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,6144,2560,0.048608001321554184
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,6144,65536,0.8815039992332458
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,6144,3072,0.0623680017888546
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,6144,2560,0.048608001321554184
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,6144,2560,0.05363199859857559
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,6144,2048,0.040383998304605484
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,6144,2048,0.04307200014591217
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,6144,1536,0.031968001276254654
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,6144,2048,0.04464000090956688
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,6144,1536,0.03379200026392937
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,6144,1536,0.0363520011305809
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,6144,1024,0.02579200081527233
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,6144,1024,0.02566399984061718
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,6144,768,0.02208000048995018
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,6144,1024,0.027904000133275986
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,6144,768,0.02223999984562397
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,6144,768,0.024447999894618988
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,6144,512,0.01756799966096878
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,6144,512,0.01744000054895878
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,6144,512,0.020191999152302742
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,6144,256,0.01375999953597784
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,6144,256,0.015519999898970127
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,6144,128,0.012736000120639801
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,6144,256,0.01648000068962574
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,6144,128,0.012128000147640705
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,6144,65536,1.0325119495391846
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,6144,64,0.015135999768972397
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,6144,128,0.014175999909639359
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,6144,64,0.013376000337302685
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,6144,64,0.014783999882638454
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,6144,32,0.015968000516295433
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,6144,32,0.01583999954164028
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,6144,32,0.013120000250637531
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,5120,12288,0.15785600244998932
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,5120,12288,0.16025599837303162
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,5120,16384,0.2056960016489029
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,5120,16384,0.2030400037765503
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,5120,16384,0.2513920068740845
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,5120,12288,0.1937599927186966
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,5120,10240,0.13894400000572205
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,5120,10240,0.13728000223636627
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,5120,65536,0.7319679856300354
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,5120,8192,0.11414399743080139
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,5120,8192,0.11724799871444702
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,5120,7168,0.10310400277376175
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,5120,7168,0.10345599800348282
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,5120,6144,0.09414400160312653
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,5120,8192,0.12595200538635254
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,5120,7168,0.11369600147008896
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,5120,10240,0.16364799439907074
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,5120,6144,0.09286399930715561
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,5120,5120,0.09126400202512741
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,5120,65536,0.758080005645752
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,5120,5120,0.08150400221347809
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,5120,6144,0.09913600236177444
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,5120,4096,0.06950400024652481
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,5120,5120,0.08409599959850311
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,5120,4096,0.06876800209283829
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,5120,3584,0.06252799928188324
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,5120,3584,0.06217600032687187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,5120,4096,0.06825599819421768
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,5120,3072,0.056095998734235764
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,5120,3072,0.05491200089454651
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,5120,3584,0.06022400036454201
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,5120,2560,0.04838399961590767
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,5120,2560,0.04831999912858009
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,5120,3072,0.05369599908590317
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,5120,2560,0.046879999339580536
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,5120,2048,0.04044799879193306
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,5120,2048,0.041471999138593674
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,5120,1536,0.03311999887228012
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,5120,1536,0.0331839993596077
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,5120,1024,0.02579200081527233
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,5120,2048,0.03798399865627289
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,5120,1536,0.03167999908328056
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,5120,1024,0.02630399912595749
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,5120,768,0.021983999758958817
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,5120,65536,1.0239039659500122
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,5120,1024,0.024639999493956566
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,5120,768,0.022207999601960182
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,5120,512,0.01788800023496151
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,5120,768,0.022048000246286392
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,5120,512,0.017472000792622566
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,5120,512,0.020255999639630318
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,5120,256,0.013824000023305416
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,5120,256,0.013663999736309052
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,5120,256,0.01568000018596649
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,5120,128,0.011487999930977821
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,5120,128,0.011935999616980553
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,5120,128,0.01414399966597557
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,5120,64,0.015072000212967396
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,5120,64,0.015168000012636185
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,5120,64,0.012608000077307224
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,5120,32,0.015744000673294067
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,5120,32,0.015647999942302704
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,5120,32,0.012736000120639801
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,4096,16384,0.16908800601959229
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,4096,12288,0.12150400131940842
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,4096,16384,0.1576640009880066
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,4096,12288,0.12319999933242798
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,4096,16384,0.1377599984407425
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,4096,10240,0.10367999970912933
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,4096,10240,0.10620799660682678
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,4096,12288,0.10534399747848511
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,4096,10240,0.08985599875450134
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,4096,8192,0.08486399799585342
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,4096,8192,0.09008000046014786
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,4096,7168,0.07628799974918365
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,4096,7168,0.07760000228881836
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,4096,8192,0.07344000041484833
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,4096,6144,0.06697600334882736
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,4096,7168,0.06524799764156342
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,4096,6144,0.06972800195217133
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,4096,5120,0.05907199904322624
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,4096,5120,0.06022400036454201
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,4096,6144,0.057760000228881836
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,4096,65536,0.6043840050697327
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,4096,4096,0.04902400076389313
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,4096,65536,0.5942720174789429
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,4096,4096,0.04793599992990494
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,4096,5120,0.049215998500585556
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,4096,3584,0.044096000492572784
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,4096,4096,0.041760001331567764
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,4096,3584,0.0453759990632534
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,4096,3072,0.0398080013692379
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,4096,3584,0.0382080003619194
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,4096,3072,0.038975998759269714
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,4096,3072,0.03359999880194664
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,4096,2560,0.035071998834609985
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,4096,2560,0.034432001411914825
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,4096,2560,0.03001599945127964
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,4096,2048,0.029152000322937965
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,4096,2048,0.030559999868273735
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,4096,2048,0.02598400041460991
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,4096,1536,0.025439999997615814
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,4096,1536,0.024960000067949295
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,4096,1024,0.02067199908196926
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,4096,65536,0.5190399885177612
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,4096,1536,0.02195199951529503
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,4096,1024,0.019936000928282738
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,4096,1024,0.01833599992096424
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,4096,768,0.01740800030529499
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,4096,768,0.017535999417304993
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,4096,512,0.01398400031030178
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,4096,768,0.01600000075995922
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,4096,512,0.014240000396966934
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,4096,512,0.014495999552309513
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,4096,256,0.011744000017642975
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,4096,256,0.012415999546647072
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,4096,128,0.010623999871313572
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,4096,128,0.012032000347971916
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,4096,64,0.013279999606311321
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,4096,128,0.010591999627649784
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,4096,64,0.013279999606311321
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,4096,64,0.010912000201642513
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,4096,32,0.014112000353634357
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,4096,32,0.01414399966597557
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,4096,32,0.01152000017464161
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,3584,16384,0.1584320068359375
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,4096,256,0.012799999676644802
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,3584,12288,0.12144000083208084
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,3584,16384,0.16025599837303162
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,3584,12288,0.12336000055074692
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,3584,16384,0.13862399756908417
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,3584,10240,0.10396800190210342
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,3584,12288,0.10556799918413162
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,3584,10240,0.10534399747848511
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,3584,8192,0.08601599931716919
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,3584,10240,0.08972799777984619
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,3584,8192,0.08671999722719193
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,3584,7168,0.07635200023651123
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,3584,8192,0.07344000041484833
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,3584,7168,0.07836800068616867
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,3584,7168,0.06560000032186508
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,3584,6144,0.06726399809122086
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,3584,6144,0.06790400296449661
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,3584,65536,0.6146240234375
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,3584,5120,0.05756799876689911
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,3584,5120,0.05958399921655655
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,3584,6144,0.05750399827957153
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,3584,4096,0.04790399968624115
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,3584,5120,0.04972799867391586
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,3584,65536,0.5983679890632629
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,3584,4096,0.049056001007556915
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,3584,3584,0.0451200008392334
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,3584,4096,0.04198399931192398
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,3584,3584,0.04556800052523613
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,3584,3072,0.039744000881910324
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,3584,3584,0.03750399872660637
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,3584,3072,0.0398080013692379
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,3584,65536,0.5192000269889832
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,3584,2560,0.03468799963593483
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,3584,3072,0.033952001482248306
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,3584,2560,0.035392001271247864
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,3584,2048,0.029440000653266907
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,3584,2560,0.029279999434947968
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,3584,2048,0.030079999938607216
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,3584,1536,0.02505600079894066
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,3584,2048,0.025567999109625816
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,3584,1024,0.019872000440955162
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,3584,1536,0.02534399926662445
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,3584,1536,0.02195199951529503
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,3584,1024,0.020479999482631683
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,3584,1024,0.01772800087928772
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,3584,768,0.017983999103307724
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,3584,768,0.01727999933063984
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,3584,768,0.015519999898970127
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,3584,512,0.014047999866306782
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,3584,512,0.014271999709308147
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,3584,512,0.014783999882638454
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,3584,256,0.011776000261306763
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,3584,256,0.011744000017642975
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,3584,128,0.010304000228643417
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,3584,256,0.012768000364303589
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,3584,128,0.010400000028312206
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,3584,128,0.01158399973064661
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,3584,64,0.013504000380635262
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,3584,64,0.011359999887645245
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,3584,64,0.013504000380635262
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,3584,32,0.013887999579310417
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,3584,32,0.01375999953597784
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,3584,32,0.011103999800980091
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,3072,12288,0.11974400281906128
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,3072,12288,0.1165120005607605
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,3072,16384,0.150176003575325
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,3072,16384,0.1544319987297058
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,3072,16384,0.13779200613498688
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,3072,12288,0.10486400127410889
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,3072,10240,0.10601600259542465
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,3072,10240,0.10943999886512756
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,3072,8192,0.09100800007581711
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,3072,8192,0.08579199761152267
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,3072,10240,0.08963199704885483
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,3072,7168,0.07635200023651123
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,3072,8192,0.07347200065851212
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,3072,7168,0.08019199967384338
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,3072,6144,0.06889600306749344
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,3072,6144,0.06892800331115723
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,3072,7168,0.06511999666690826
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,3072,65536,0.5619199872016907
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,3072,5120,0.05830400064587593
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,3072,6144,0.05801599845290184
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,3072,4096,0.047488000243902206
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,3072,5120,0.050144001841545105
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,3072,65536,0.5644479990005493
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,3072,3584,0.045152001082897186
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,3072,4096,0.04931199923157692
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,3072,3584,0.04460800066590309
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,3072,4096,0.04134399816393852
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,3072,3584,0.03743999823927879
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,3072,3072,0.038975998759269714
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,3072,3072,0.03999999910593033
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,3072,5120,0.060736000537872314
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,3072,65536,0.5193600058555603
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,3072,2560,0.034623999148607254
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,3072,2560,0.03574400022625923
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,3072,3072,0.03385600075125694
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,3072,2048,0.029503999277949333
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,3072,2560,0.029664000496268272
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,3072,2048,0.030271999537944794
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,3072,1536,0.02518399991095066
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,3072,1536,0.024992000311613083
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,3072,1024,0.020128000527620316
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,3072,2048,0.024992000311613083
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,3072,1536,0.02179200015962124
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,3072,1024,0.020191999152302742
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,3072,1024,0.01692800037562847
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,3072,768,0.017952000722289085
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,3072,768,0.017216000705957413
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,3072,768,0.01532800029963255
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,3072,512,0.014592000283300877
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,3072,512,0.014175999909639359
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,3072,512,0.013728000223636627
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,3072,256,0.011231999844312668
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,3072,256,0.011327999643981457
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,3072,256,0.01190400030463934
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,3072,128,0.010111999697983265
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,3072,64,0.013407999649643898
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,3072,128,0.010495999827980995
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,3072,128,0.010432000271975994
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,3072,64,0.013567999936640263
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,3072,32,0.014399999752640724
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,3072,64,0.009535999968647957
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,3072,32,0.014047999866306782
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,3072,32,0.00979200005531311
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,2560,12288,0.12095999717712402
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,2560,12288,0.10342399775981903
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,2560,16384,0.12771199643611908
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,2560,16384,0.13212800025939941
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,2560,16384,0.13728000223636627
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,2560,12288,0.10476800054311752
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,2560,10240,0.0894400030374527
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,2560,10240,0.08752000331878662
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,2560,8192,0.07340800017118454
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,2560,8192,0.07401599735021591
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,2560,10240,0.08934400230646133
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,2560,65536,0.598143994808197
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,2560,8192,0.06969600170850754
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,2560,7168,0.06569600105285645
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,2560,6144,0.06022400036454201
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,2560,7168,0.06691200286149979
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,2560,6144,0.06710399687290192
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,2560,7168,0.06313599646091461
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,2560,5120,0.05183999985456467
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,2560,65536,0.46959999203681946
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,2560,6144,0.055776000022888184
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,2560,4096,0.04793599992990494
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,2560,5120,0.0517439991235733
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,2560,4096,0.04499199986457825
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,2560,5120,0.04745600000023842
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,2560,3584,0.04102399945259094
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,2560,4096,0.03929600119590759
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,2560,3584,0.040863998234272
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,2560,3072,0.03798399865627289
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,2560,3584,0.03590400144457817
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,2560,3072,0.03932800143957138
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,2560,65536,0.519648015499115
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,2560,2560,0.03551999852061272
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,2560,3072,0.03174399957060814
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,2560,2560,0.03404799848794937
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,2560,2560,0.02860799990594387
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,2560,2048,0.02956799976527691
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,2560,2048,0.02457600086927414
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,2560,2048,0.029983999207615852
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,2560,1536,0.024831999093294144
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,2560,1536,0.024032000452280045
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,2560,1024,0.02051199972629547
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,2560,1536,0.020735999569296837
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,2560,1024,0.019936000928282738
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,2560,1024,0.016543999314308167
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,2560,768,0.01740800030529499
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,2560,768,0.01759999990463257
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,2560,768,0.014560000039637089
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,2560,512,0.014303999952971935
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,2560,512,0.013887999579310417
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,2560,256,0.01142400037497282
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,2560,512,0.013088000006973743
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,2560,256,0.01119999960064888
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,2560,256,0.011392000131309032
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,2560,128,0.010239999741315842
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,2560,128,0.009920000098645687
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,2560,128,0.010432000271975994
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,2560,64,0.013791999779641628
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,2560,64,0.009312000125646591
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,2560,64,0.013439999893307686
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,2560,32,0.013728000223636627
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,2560,32,0.013887999579310417
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,2560,32,0.010143999941647053
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,2048,16384,0.08614400029182434
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,2048,12288,0.06963200122117996
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,2048,12288,0.06729599833488464
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,2048,16384,0.08915200084447861
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,2048,16384,0.10268799960613251
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,2048,12288,0.07708799839019775
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,2048,10240,0.05936000123620033
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,2048,10240,0.05862399935722351
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,2048,10240,0.06956800073385239
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,2048,8192,0.04886399954557419
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,2048,8192,0.04915200173854828
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,2048,65536,0.4557119905948639
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,2048,7168,0.05615999922156334
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,2048,8192,0.053408000618219376
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,2048,7168,0.07222399860620499
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,2048,6144,0.03788800165057182
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,2048,7168,0.047807998955249786
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,2048,6144,0.04076800122857094
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,2048,5120,0.033695999532938004
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,2048,6144,0.042527999728918076
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,2048,5120,0.052191998809576035
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,2048,65536,0.3932799994945526
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,2048,5120,0.03699199855327606
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,2048,4096,0.03951999917626381
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,2048,4096,0.029920000582933426
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,2048,4096,0.03030399978160858
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,2048,3584,0.02739199995994568
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,2048,3584,0.02643200010061264
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,2048,3584,0.027807999402284622
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,2048,65536,0.40351998805999756
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,2048,3072,0.025312000885605812
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,2048,3072,0.035551998764276505
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,2048,3072,0.025599999353289604
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,2048,2560,0.022112000733613968
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,2048,2560,0.028224000707268715
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,2048,2560,0.022463999688625336
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,2048,2048,0.0191040001809597
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,2048,2048,0.024288000538945198
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,2048,2048,0.02006400004029274
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,2048,1536,0.01664000004529953
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,2048,1536,0.017983999103307724
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,2048,1536,0.017055999487638474
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,2048,1024,0.014399999752640724
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,2048,1024,0.01484800036996603
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,2048,1024,0.013887999579310417
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,2048,768,0.013088000006973743
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,2048,768,0.013311999849975109
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,2048,512,0.01158399973064661
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,2048,768,0.012319999746978283
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,2048,512,0.011071999557316303
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,2048,512,0.011935999616980553
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,2048,256,0.009824000298976898
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,2048,256,0.01017600018531084
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,2048,128,0.009184000082314014
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,2048,256,0.010239999741315842
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,2048,128,0.009503999724984169
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,2048,128,0.009696000255644321
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,2048,64,0.01283199992030859
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,2048,64,0.012703999876976013
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,2048,32,0.01360000018030405
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,2048,64,0.009056000038981438
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,2048,32,0.012768000364303589
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,2048,32,0.009920000098645687
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,1536,12288,0.06457599997520447
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,1536,12288,0.06524799764156342
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,1536,16384,0.07852800190448761
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,1536,16384,0.0798719972372055
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,1536,16384,0.08028800040483475
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,1536,10240,0.0684799998998642
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,1536,12288,0.06233600154519081
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,1536,10240,0.05942400172352791
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,1536,8192,0.05724800005555153
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,1536,8192,0.04761600121855736
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,1536,10240,0.053279999643564224
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,1536,65536,0.2412479966878891
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,1536,8192,0.04416000097990036
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,1536,7168,0.04966399818658829
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,1536,7168,0.05104000121355057
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,1536,6144,0.03843199834227562
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,1536,65536,0.24531200528144836
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,1536,7168,0.039103999733924866
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,1536,6144,0.039744000881910324
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,1536,6144,0.034591998904943466
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,1536,5120,0.03363199904561043
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,1536,5120,0.034143999218940735
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,1536,4096,0.03948799893260002
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,1536,4096,0.04032000154256821
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,1536,5120,0.030880000442266464
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,1536,4096,0.026048000901937485
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,1536,3584,0.026367999613285065
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,1536,65536,0.33456000685691833
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,1536,3584,0.026944000273942947
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,1536,3584,0.023615999147295952
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,1536,3072,0.024000000208616257
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,1536,3072,0.024512000381946564
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,1536,3072,0.02160000056028366
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,1536,2560,0.021536000072956085
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,1536,2048,0.01881599985063076
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,1536,2560,0.021888000890612602
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,1536,2048,0.01664000004529953
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,1536,1536,0.016736000776290894
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,1536,2560,0.018751999363303185
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,1536,1536,0.017472000792622566
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,1536,1024,0.014240000396966934
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,1536,1536,0.014783999882638454
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,1536,1024,0.017055999487638474
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,1536,768,0.013024000450968742
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,1536,1024,0.012512000277638435
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,1536,768,0.013088000006973743
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,1536,768,0.01152000017464161
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,1536,512,0.011264000087976456
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,1536,512,0.011008000001311302
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,1536,512,0.011103999800980091
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,1536,256,0.009855999611318111
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,1536,256,0.00979200005531311
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,1536,128,0.009247999638319016
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,1536,256,0.010143999941647053
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,1536,128,0.00940799992531538
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,1536,2048,0.02473600022494793
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,1536,128,0.009216000325977802
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,1536,64,0.01244799979031086
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,1536,64,0.008576000109314919
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,1536,64,0.012608000077307224
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,1536,32,0.012896000407636166
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,1536,32,0.012543999589979649
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,1536,32,0.009247999638319016
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,1024,12288,0.06595200300216675
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,1024,12288,0.05158400163054466
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,1024,16384,0.06224000081419945
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,1024,16384,0.06598400324583054
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,1024,16384,0.05696000158786774
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,1024,12288,0.04438399896025658
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,1024,10240,0.04540799930691719
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,1024,10240,0.04598399996757507
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,1024,8192,0.038943998515605927
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,1024,10240,0.03836800158023834
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,1024,7168,0.03331200033426285
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,1024,8192,0.042080000042915344
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,1024,7168,0.036031998693943024
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,1024,8192,0.03200000151991844
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,1024,65536,0.2011519968509674
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,1024,7168,0.02953599952161312
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,1024,6144,0.031968001276254654
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,1024,6144,0.03379200026392937
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,1024,5120,0.028896000236272812
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,1024,5120,0.04095999896526337
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,1024,65536,0.20396800339221954
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,1024,6144,0.02643200010061264
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,1024,5120,0.02316799946129322
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,1024,4096,0.023135999217629433
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,1024,4096,0.03392000123858452
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,1024,65536,0.2033279985189438
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,1024,4096,0.01974399946630001
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,1024,3584,0.02131200022995472
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,1024,3584,0.022112000733613968
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,1024,3584,0.018688000738620758
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,1024,3072,0.01961600035429001
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,1024,3072,0.019999999552965164
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,1024,2560,0.018464000895619392
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,1024,3072,0.01648000068962574
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,1024,2560,0.01817600056529045
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,1024,2048,0.016095999628305435
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,1024,2560,0.015168000012636185
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,1024,2048,0.02160000056028366
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,1024,1536,0.016095999628305435
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,1024,2048,0.013632000423967838
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,1024,1536,0.014720000326633453
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,1024,1024,0.012480000033974648
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,1024,1536,0.012095999903976917
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,1024,768,0.013183999806642532
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,1024,1024,0.012256000190973282
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,1024,1024,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,1024,768,0.01158399973064661
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,1024,768,0.011744000017642975
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,1024,512,0.010688000358641148
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,1024,512,0.011455999687314034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,1024,256,0.009247999638319016
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,1024,256,0.009759999811649323
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,1024,512,0.011264000087976456
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,1024,256,0.009568000212311745
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,1024,128,0.00902399979531765
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,1024,128,0.00854399986565113
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,1024,128,0.008895999751985073
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,1024,64,0.011807999573647976
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,1024,64,0.012256000190973282
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,1024,64,0.00825599953532219
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,1024,32,0.012223999947309494
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,1024,32,0.013279999606311321
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,1024,32,0.00854399986565113
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,768,12288,0.04355200007557869
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,768,16384,0.05289600044488907
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,768,16384,0.05183999985456467
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,768,12288,0.043935999274253845
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,768,16384,0.05603199824690819
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,768,10240,0.04016000032424927
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,768,10240,0.03977600112557411
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,768,8192,0.04732799902558327
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,768,65536,0.14870400726795197
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,768,10240,0.03840000182390213
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,768,65536,0.15644800662994385
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,768,12288,0.04335999861359596
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,768,8192,0.03654399886727333
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,768,8192,0.031936001032590866
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,768,7168,0.033663999289274216
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,768,7168,0.03340800106525421
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,768,7168,0.02921600081026554
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,768,6144,0.03129599988460541
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,768,6144,0.031231999397277832
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,768,5120,0.03308799862861633
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,768,65536,0.20316800475120544
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,768,6144,0.026176000013947487
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,768,5120,0.027615999802947044
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,768,4096,0.02316799946129322
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,768,4096,0.024032000452280045
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,768,5120,0.022624000906944275
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,768,4096,0.019711999222636223
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,768,3584,0.02579200081527233
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,768,3072,0.02332800067961216
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,768,3584,0.018719999119639397
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,768,3584,0.02147199958562851
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,768,2560,0.017920000478625298
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,768,3072,0.019680000841617584
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,768,3072,0.01679999940097332
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,768,2560,0.017696000635623932
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,768,2560,0.015168000012636185
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,768,2048,0.0163199994713068
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,768,2048,0.016127999871969223
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,768,1536,0.014240000396966934
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,768,1536,0.014720000326633453
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,768,2048,0.013472000136971474
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,768,1024,0.014175999909639359
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,768,1536,0.012191999703645706
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,768,1024,0.012319999746978283
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,768,1024,0.010688000358641148
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,768,768,0.011807999573647976
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,768,768,0.011231999844312668
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,768,512,0.009631999768316746
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,768,768,0.011327999643981457
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,768,512,0.009696000255644321
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,768,512,0.01104000024497509
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,768,256,0.009216000325977802
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,768,256,0.009184000082314014
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,768,256,0.009247999638319016
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,768,128,0.008448000065982342
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,768,128,0.008224000222980976
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,768,64,0.011711999773979187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,768,128,0.00854399986565113
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,768,64,0.011839999817311764
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,768,64,0.00825599953532219
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,768,32,0.01228800043463707
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,768,32,0.012095999903976917
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,768,32,0.008352000266313553
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,512,12288,0.03855999931693077
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,512,12288,0.04419200122356415
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,512,16384,0.044224001467227936
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,512,16384,0.04297599941492081
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,512,16384,0.03580800071358681
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,512,12288,0.027904000133275986
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,512,10240,0.03532800078392029
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,512,10240,0.0395519994199276
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,512,10240,0.025407999753952026
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,512,8192,0.03299200162291527
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,512,8192,0.03244800120592117
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,512,8192,0.021088000386953354
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,512,65536,0.12694400548934937
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,512,7168,0.03126399964094162
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,512,65536,0.1687999963760376
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,512,6144,0.030239999294281006
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,512,7168,0.030912000685930252
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,512,7168,0.019807999953627586
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,512,6144,0.028736000880599022
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,512,5120,0.025696000084280968
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,512,6144,0.01775999926030636
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,512,5120,0.024768000468611717
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,512,5120,0.016736000776290894
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,512,4096,0.02195199951529503
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,512,4096,0.01414399966597557
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,512,4096,0.021376000717282295
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,512,65536,0.11820799857378006
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,512,3584,0.02131200022995472
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,512,3584,0.01942400075495243
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,512,3072,0.018783999606966972
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,512,3072,0.018239999189972878
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,512,3584,0.013407999649643898
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,512,2560,0.017376000061631203
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,512,3072,0.01244799979031086
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,512,2560,0.0163199994713068
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,512,2560,0.01408000010997057
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,512,2048,0.021568000316619873
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,512,2048,0.01635199971497059
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,512,1536,0.013344000093638897
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,512,1536,0.013728000223636627
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,512,2048,0.010879999957978725
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,512,1536,0.010239999741315842
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,512,1024,0.01190400030463934
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,512,1024,0.011776000261306763
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,512,768,0.011392000131309032
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,512,1024,0.009503999724984169
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,512,768,0.010847999714314938
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,512,512,0.009375999681651592
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,512,768,0.00886400043964386
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,512,512,0.010495999827980995
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,512,256,0.008736000396311283
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,512,256,0.009151999838650227
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,512,128,0.008352000266313553
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,512,256,0.009088000282645226
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,512,128,0.008031999692320824
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,512,128,0.00848000030964613
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,512,64,0.011839999817311764
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,512,64,0.012191999703645706
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,512,64,0.008287999778985977
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,512,32,0.012223999947309494
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,512,32,0.011392000131309032
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,512,32,0.008063999935984612
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,512,512,0.009279999881982803
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,256,12288,0.03171199932694435
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,256,16384,0.03747199848294258
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,256,16384,0.03721600025892258
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,256,16384,0.03452799841761589
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,256,12288,0.03161599859595299
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,256,12288,0.04291199892759323
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,256,10240,0.03145600110292435
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,256,8192,0.02739199995994568
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,256,8192,0.027871999889612198
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,256,10240,0.031168000772595406
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,256,10240,0.03699199855327606
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,256,65536,0.09305600076913834
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,256,7168,0.032735999673604965
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,256,8192,0.02099199965596199
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,256,7168,0.027712000533938408
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,256,6144,0.029023999348282814
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,256,7168,0.026079999282956123
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,256,65536,0.08556800335645676
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,256,6144,0.025439999997615814
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,256,65536,0.1167680025100708
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,256,6144,0.017343999817967415
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,256,4096,0.020255999639630318
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,256,5120,0.024320000782608986
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,256,5120,0.015744000673294067
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,256,5120,0.025919999927282333
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,256,4096,0.020735999569296837
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,256,4096,0.014175999909639359
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,256,3584,0.01913600042462349
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,256,3584,0.019168000668287277
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,256,3584,0.014175999909639359
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,256,3072,0.012608000077307224
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,256,3072,0.01775999926030636
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,256,2560,0.01583999954164028
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,256,3072,0.017535999417304993
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,256,2560,0.016416000202298164
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,256,2048,0.014944000169634819
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,256,2560,0.01235199999064207
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,256,2048,0.011008000001311302
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,256,1536,0.012927999719977379
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,256,2048,0.014751999638974667
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,256,1536,0.012864000163972378
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,256,1024,0.011487999930977821
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,256,1536,0.00979200005531311
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,256,1024,0.01142400037497282
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,256,768,0.011136000044643879
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,256,1024,0.009631999768316746
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,256,768,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,256,768,0.010208000428974628
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,256,512,0.00886400043964386
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,256,512,0.009056000038981438
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,256,512,0.01027199998497963
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,256,256,0.008287999778985977
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,256,256,0.008704000152647495
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,256,256,0.008927999995648861
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,256,128,0.008895999751985073
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,256,128,0.008287999778985977
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,256,64,0.01158399973064661
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,256,128,0.008063999935984612
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,256,64,0.011552000418305397
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,256,64,0.007807999849319458
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,256,32,0.011648000217974186
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,256,32,0.01190400030463934
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,256,32,0.008031999692320824
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,128,12288,0.031488001346588135
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,128,12288,0.029343999922275543
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,128,16384,0.03481600061058998
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,128,16384,0.035392001271247864
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,128,16384,0.041760001331567764
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,128,12288,0.04243199899792671
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,128,10240,0.02707199938595295
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,128,10240,0.02582399919629097
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,128,8192,0.02534399926662445
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,128,8192,0.02457600086927414
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,128,10240,0.02425600029528141
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,128,8192,0.030400000512599945
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,128,7168,0.023423999547958374
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,128,65536,0.08953599631786346
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,128,65536,0.07539200037717819
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,128,7168,0.023903999477624893
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,128,7168,0.0191040001809597
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,128,6144,0.028255999088287354
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,128,6144,0.022304000332951546
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,128,5120,0.020864000543951988
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,128,5120,0.022272000089287758
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,128,5120,0.01568000018596649
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,128,4096,0.02175999991595745
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,128,65536,0.11747200042009354
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,128,4096,0.020927999168634415
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,128,4096,0.01849599927663803
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,128,3584,0.020447999238967896
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,128,3584,0.01926399953663349
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,128,3072,0.01820800080895424
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,128,2560,0.015456000342965126
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,128,3072,0.012384000234305859
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,128,3072,0.017632000148296356
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,128,3584,0.013919999822974205
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,128,2560,0.01603199914097786
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,128,2560,0.011296000331640244
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,128,6144,0.024639999493956566
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,128,2048,0.014976000413298607
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,128,2048,0.014495999552309513
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,128,1536,0.013151999562978745
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,128,2048,0.010432000271975994
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,128,1536,0.013728000223636627
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,128,1536,0.010015999898314476
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,128,1024,0.011648000217974186
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,128,1024,0.011839999817311764
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,128,1024,0.009344000369310379
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,128,768,0.01027199998497963
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,128,768,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,128,768,0.01056000031530857
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,128,512,0.00886400043964386
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,128,512,0.008895999751985073
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,128,512,0.01027199998497963
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,128,256,0.008320000022649765
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,128,256,0.00825599953532219
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,128,128,0.0080960001796484
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,128,256,0.008352000266313553
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,128,128,0.007903999648988247
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,128,128,0.008287999778985977
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,128,64,0.011552000418305397
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,128,64,0.011359999887645245
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,128,64,0.008448000065982342
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,128,32,0.011935999616980553
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,128,32,0.008031999692320824
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,128,32,0.011327999643981457
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,64,12288,0.03062400035560131
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,64,12288,0.02860799990594387
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,64,16384,0.0350399985909462
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,64,16384,0.03299200162291527
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,64,16384,0.034752000123262405
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,64,12288,0.027327999472618103
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,64,10240,0.028511999174952507
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,64,10240,0.02691200003027916
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,64,10240,0.03683200106024742
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,64,8192,0.02687999978661537
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,64,8192,0.02505600079894066
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,64,65536,0.07503999769687653
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,64,7168,0.023871999233961105
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,64,7168,0.02582399919629097
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,64,65536,0.07823999971151352
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,64,8192,0.020608000457286835
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,64,7168,0.018880000337958336
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,64,6144,0.02377600036561489
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,64,4096,0.02195199951529503
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,64,6144,0.023391999304294586
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,64,6144,0.02409599907696247
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,64,5120,0.02287999913096428
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,64,5120,0.021088000386953354
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,64,4096,0.020576000213623047
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,64,5120,0.015359999611973763
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,64,65536,0.11670400202274323
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,64,3584,0.019680000841617584
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,64,3584,0.018464000895619392
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,64,4096,0.013887999579310417
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,64,3072,0.017503999173641205
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,64,3072,0.01727999933063984
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,64,3584,0.013088000006973743
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,64,2560,0.01679999940097332
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,64,3072,0.014720000326633453
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,64,2560,0.01651199907064438
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,64,2048,0.015424000099301338
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,64,2560,0.011455999687314034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,64,2048,0.014336000196635723
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,64,1536,0.012703999876976013
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,64,2048,0.010688000358641148
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,64,1536,0.013088000006973743
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,64,1536,0.009727999567985535
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,64,1024,0.012000000104308128
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,64,1024,0.011359999887645245
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,64,768,0.010111999697983265
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,64,1024,0.00886400043964386
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,64,512,0.009344000369310379
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,64,768,0.010847999714314938
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,64,768,0.008960000239312649
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,64,512,0.008704000152647495
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,64,512,0.009535999968647957
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,64,256,0.0081599997356534
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,64,256,0.007903999648988247
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,64,128,0.007648000027984381
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,64,256,0.008448000065982342
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,64,128,0.007840000092983246
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,64,64,0.011615999974310398
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,64,64,0.011359999887645245
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,64,128,0.008224000222980976
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,64,64,0.007935999892652035
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,64,32,0.011296000331640244
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,64,32,0.008063999935984612
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,32,12288,0.03081599995493889
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,32,16384,0.03440000116825104
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,32,16384,0.033984001725912094
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,32,16384,0.05385600030422211
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,64,32,0.01152000017464161
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,32,12288,0.02879999950528145
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,32,12288,0.04076800122857094
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,32,10240,0.02844800055027008
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,32,10240,0.026367999613285065
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,32,8192,0.027295999228954315
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,32,65536,0.07846400141716003
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,32,10240,0.03542400151491165
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,32,8192,0.02735999971628189
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,32,8192,0.02454400062561035
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,32,65536,0.07884799689054489
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,32,7168,0.023520000278949738
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,32,7168,0.02579200081527233
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,32,6144,0.02208000048995018
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,32,6144,0.024000000208616257
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,32,7168,0.025760000571608543
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,32,65536,0.11289600282907486
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,32,6144,0.022463999688625336
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,32,5120,0.02287999913096428
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,32,5120,0.02143999934196472
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,32,5120,0.020576000213623047
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,32,4096,0.020576000213623047
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,32,4096,0.021536000072956085
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,32,3584,0.018880000337958336
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,32,4096,0.01708799973130226
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,32,3584,0.019360000267624855
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,32,3584,0.01635199971497059
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,32,3072,0.01724799908697605
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,32,3072,0.017696000635623932
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,32,2560,0.015424000099301338
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,32,2560,0.013151999562978745
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,32,2048,0.014879999682307243
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,32,2560,0.016575999557971954
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,32,3072,0.015168000012636185
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,32,2048,0.014911999925971031
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,32,2048,0.012032000347971916
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,32,1536,0.01283199992030859
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,32,1536,0.012799999676644802
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,32,1024,0.011359999887645245
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,32,1024,0.011008000001311302
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,32,1536,0.010495999827980995
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,32,768,0.011103999800980091
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,32,1024,0.009535999968647957
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,32,768,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,32,512,0.008767999708652496
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,32,768,0.009119999594986439
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,32,512,0.008960000239312649
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,32,256,0.008128000423312187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,32,256,0.00863999966531992
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,32,512,0.009568000212311745
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,32,256,0.00848000030964613
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,32,128,0.007840000092983246
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,32,128,0.007712000049650669
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,32,64,0.011552000418305397
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,32,128,0.007968000136315823
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,32,64,0.011296000331640244
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,32,64,0.007872000336647034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1024,32,32,0.011359999887645245
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1024,32,32,0.011455999687314034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1024,32,32,0.00800000037997961
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,65536,10240,1.1613759994506836
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,65536,12288,1.3911679983139038
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,65536,10240,1.1444799900054932
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,65536,12288,1.3761600255966187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,65536,8192,0.9402880072593689
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,65536,8192,0.9192320108413696
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,65536,16384,1.8359359502792358
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,65536,7168,0.8284159898757935
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,65536,7168,0.8101760149002075
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,65536,16384,1.8047679662704468
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,65536,6144,0.720255970954895
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,65536,12288,1.206015944480896
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,65536,10240,1.0119359493255615
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,65536,5120,0.602400004863739
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,65536,5120,0.5907840132713318
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,65536,4096,0.49718400835990906
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,65536,6144,0.700767993927002
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,65536,16384,1.5733120441436768
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,65536,4096,0.4864000082015991
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,65536,8192,0.8224319815635681
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,65536,7168,0.7227839827537537
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,65536,3584,0.4399360120296478
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,65536,6144,0.6325119733810425
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,65536,3584,0.42825600504875183
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,65536,5120,0.5370240211486816
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,65536,3072,0.3455039858818054
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,65536,4096,0.44067201018333435
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,65536,3072,0.6667199730873108
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,65536,2560,0.3219200074672699
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,65536,2560,0.327455997467041
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,65536,2048,0.2685759961605072
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,65536,1536,0.21571199595928192
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,65536,3584,0.3962880074977875
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,65536,2048,0.2775680124759674
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,65536,2560,0.29996800422668457
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,65536,3072,0.66348797082901
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,65536,1024,0.15769599378108978
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,65536,1536,0.2125760018825531
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,65536,768,0.12732799351215363
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,65536,2048,0.2507520020008087
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,65536,1024,0.155008003115654
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,65536,768,0.13203200697898865
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,65536,512,0.08819200098514557
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,65536,512,0.19548800587654114
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,65536,1024,0.15510399639606476
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,65536,256,0.058240000158548355
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,65536,256,0.06102399900555611
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,65536,1536,0.20294399559497833
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,65536,768,0.13417600095272064
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,65536,512,0.1098880022764206
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,65536,128,0.04787199944257736
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,65536,128,0.04841599985957146
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,65536,64,0.04870399832725525
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,65536,256,0.08233600109815598
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,65536,64,0.04975999891757965
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,65536,128,0.06595200300216675
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,65536,32,0.06095999851822853
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,65536,32,0.06015999987721443
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,65536,64,0.058880001306533813
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,65536,32,0.05990400165319443
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,16384,12288,0.3447360098361969
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,16384,16384,0.46428799629211426
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,16384,12288,0.30326399207115173
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,16384,10240,0.293503999710083
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,16384,12288,0.35471999645233154
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,16384,16384,0.4623680114746094
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,16384,10240,0.2998400032520294
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,16384,8192,0.23676800727844238
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,16384,65536,1.7915199995040894
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,16384,8192,0.24320000410079956
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,16384,16384,0.3984319865703583
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,16384,10240,0.2577599883079529
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,16384,7168,0.21395200490951538
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,16384,7168,0.2128639966249466
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,16384,6144,0.18316799402236938
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,16384,8192,0.2099200040102005
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,16384,6144,0.18483200669288635
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,16384,7168,0.18585599958896637
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,16384,5120,0.15891200304031372
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,16384,5120,0.1555519998073578
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,16384,4096,0.1305599957704544
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,16384,4096,0.12809599936008453
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,16384,6144,0.16179199516773224
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,16384,3584,0.11654400080442429
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,16384,5120,0.13846400380134583
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,16384,3584,0.1151999980211258
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,16384,4096,0.11340799927711487
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,16384,3072,0.10198400169610977
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,16384,3072,0.10025600343942642
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,16384,2560,0.08595199882984161
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,16384,2560,0.0891840010881424
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,16384,3584,0.1034879982471466
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,16384,3072,0.09097599983215332
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,16384,2048,0.07433599978685379
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,16384,2048,0.07289600372314453
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,16384,2560,0.07964800298213959
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,16384,1536,0.05926400050520897
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,16384,2048,0.06649599969387054
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,16384,1024,0.04575999826192856
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,16384,1536,0.059167999774217606
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,16384,1024,0.0987199991941452
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,16384,1024,0.04255999997258186
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,16384,1536,0.054816000163555145
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,16384,768,0.05087999999523163
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,16384,768,0.03484800085425377
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,16384,512,0.027807999402284622
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,16384,512,0.02831999957561493
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,16384,768,0.03759999945759773
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,16384,256,0.020576000213623047
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,16384,512,0.03126399964094162
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,16384,256,0.020320000126957893
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,16384,256,0.02579200081527233
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,16384,128,0.01635199971497059
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,16384,128,0.02099199965596199
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,16384,128,0.02195199951529503
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,16384,64,0.019039999693632126
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,16384,64,0.019168000668287277
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,16384,64,0.01942400075495243
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,16384,32,0.022655999287962914
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,16384,32,0.022784000262618065
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,16384,32,0.019648000597953796
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,16384,65536,1.777184009552002
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,12288,16384,0.3563840091228485
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,12288,12288,0.27430400252342224
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,12288,16384,0.3492799997329712
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,12288,65536,1.3328319787979126
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,12288,16384,0.39747199416160583
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,12288,12288,0.271807998418808
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,12288,10240,0.22934399545192719
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,12288,12288,0.3033280074596405
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,12288,10240,0.23535999655723572
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,16384,65536,1.546015977859497
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,12288,8192,0.1887039989233017
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,12288,8192,0.1929280012845993
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,12288,10240,0.25571200251579285
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,12288,7168,0.17219200730323792
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,12288,8192,0.2083519995212555
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,12288,7168,0.1706240028142929
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,12288,6144,0.3986560106277466
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,12288,6144,0.14959999918937683
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,12288,5120,0.12969599664211273
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,12288,7168,0.1855040043592453
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,12288,5120,0.13849599659442902
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,12288,4096,0.10864000022411346
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,12288,6144,0.16204799711704254
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,12288,5120,0.13792000710964203
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,12288,3584,0.09827200323343277
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,12288,4096,0.10835199803113937
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,12288,4096,0.11296000331640244
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,12288,3584,0.10003200173377991
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,12288,3072,0.08777599781751633
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,12288,3072,0.08985599875450134
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,12288,3584,0.10220800340175629
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,12288,3072,0.08991999924182892
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,12288,2560,0.07862400263547897
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,12288,2560,0.0769599974155426
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,12288,65536,1.3080960512161255
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,12288,2048,0.0628800019621849
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,12288,2048,0.06540799885988235
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,12288,2048,0.0671359971165657
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,12288,1536,0.0498879998922348
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,12288,2560,0.07846400141716003
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,12288,1536,0.0575999990105629
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,12288,1536,0.05385600030422211
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,12288,1024,0.07327999919652939
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,12288,1024,0.04201599955558777
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,12288,1024,0.037856001406908035
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,12288,768,0.05801599845290184
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,12288,768,0.030527999624609947
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,12288,512,0.02457600086927414
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,12288,768,0.036896001547575
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,12288,512,0.025151999667286873
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,12288,256,0.01788800023496151
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,12288,256,0.017952000722289085
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,12288,512,0.031231999397277832
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,12288,256,0.02534399926662445
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,12288,128,0.015519999898970127
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,12288,128,0.015104000456631184
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,12288,128,0.018015999346971512
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,12288,64,0.017632000148296356
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,12288,64,0.017696000635623932
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,12288,32,0.023072000592947006
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,12288,32,0.022784000262618065
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,12288,64,0.016416000202298164
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,12288,32,0.016575999557971954
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,10240,12288,0.23440000414848328
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,12288,65536,1.533568024635315
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,10240,12288,0.2401919960975647
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,10240,16384,0.5464000105857849
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,10240,16384,0.315744012594223
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,10240,10240,0.20073600113391876
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,10240,65536,1.1974079608917236
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,10240,16384,0.26707199215888977
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,10240,12288,0.20636799931526184
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,10240,10240,0.20319999754428864
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,10240,65536,1.1868799924850464
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,10240,8192,0.16252799332141876
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,10240,7168,0.14419199526309967
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,10240,10240,0.17267200350761414
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,10240,7168,0.24668799340724945
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,10240,8192,0.16550399363040924
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,10240,8192,0.14047999680042267
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,10240,6144,0.12723200023174286
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,10240,5120,0.11107199639081955
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,10240,6144,0.1273919939994812
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,10240,5120,0.10771200060844421
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,10240,4096,0.0888959988951683
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,10240,4096,0.08902399986982346
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,10240,7168,0.12611199915409088
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,10240,6144,0.10931199789047241
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,10240,3584,0.12812800705432892
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,10240,5120,0.09379199892282486
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,10240,3584,0.08179199695587158
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,10240,3072,0.07119999825954437
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,10240,4096,0.07737600058317184
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,10240,3072,0.07129599899053574
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,10240,3072,0.06268800050020218
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,10240,3584,0.07132799923419952
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,10240,2560,0.0613120011985302
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,10240,2560,0.14297600090503693
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,10240,2048,0.05116799846291542
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,10240,2048,0.05222399905323982
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,10240,2560,0.05510399863123894
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,10240,2048,0.04639999940991402
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,10240,1536,0.09030400216579437
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,10240,1536,0.04163200035691261
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,10240,1024,0.03129599988460541
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,10240,1024,0.032287999987602234
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,10240,1536,0.03779200091958046
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,10240,768,0.02566399984061718
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,10240,1024,0.030432000756263733
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,10240,768,0.02707199938595295
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,10240,512,0.021568000316619873
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,10240,512,0.02143999934196472
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,10240,512,0.025248000398278236
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,10240,256,0.015968000516295433
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,10240,256,0.01603199914097786
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,10240,128,0.013728000223636627
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,10240,256,0.0197759997099638
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,10240,128,0.013856000266969204
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,10240,128,0.01759999990463257
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,10240,64,0.015807999297976494
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,10240,64,0.016448000445961952
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,10240,64,0.015936000272631645
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,10240,32,0.020576000213623047
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,10240,32,0.01788800023496151
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,10240,768,0.047168001532554626
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,10240,32,0.015904000028967857
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,8192,12288,0.19167999923229218
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,8192,16384,0.2417919933795929
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,8192,16384,0.24825599789619446
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,10240,65536,1.0327039957046509
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,8192,12288,0.18931199610233307
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,8192,10240,0.16624000668525696
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,8192,10240,0.16300800442695618
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,8192,16384,0.2693760097026825
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,8192,12288,0.2054080069065094
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,8192,8192,0.1348160058259964
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,8192,8192,0.13420799374580383
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,8192,10240,0.17350399494171143
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,8192,65536,0.8776320219039917
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,8192,7168,0.12214399874210358
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,8192,6144,0.10547199845314026
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,8192,7168,0.12115199863910675
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,8192,8192,0.14207999408245087
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,8192,5120,0.09462399780750275
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,8192,7168,0.12585599720478058
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,8192,6144,0.10851199924945831
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,8192,5120,0.09363199770450592
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,8192,6144,0.10992000252008438
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,8192,4096,0.08051200211048126
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,8192,5120,0.09372799843549728
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,8192,3584,0.0732479989528656
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,8192,4096,0.08073599636554718
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,8192,4096,0.07782399654388428
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,8192,3584,0.08895999938249588
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,8192,65536,0.8949120044708252
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,8192,3072,0.06595200300216675
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,8192,3072,0.05651199817657471
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,8192,3584,0.06969600170850754
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,8192,3072,0.06195199862122536
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,8192,2560,0.05910399928689003
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,8192,2560,0.05897599831223488
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,8192,2048,0.04134399816393852
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,8192,2048,0.050944000482559204
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,8192,2560,0.05392000079154968
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,8192,2048,0.04560000076889992
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,8192,1536,0.03372799977660179
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,8192,1536,0.033504001796245575
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,8192,1024,0.04806400090456009
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,8192,1024,0.026016000658273697
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,8192,1536,0.03743999823927879
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,8192,768,0.021568000316619873
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,8192,1024,0.030239999294281006
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,8192,768,0.02112000063061714
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,8192,512,0.018079999834299088
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,8192,768,0.02703999914228916
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,8192,512,0.01772800087928772
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,8192,512,0.023360000923275948
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,8192,256,0.01414399966597557
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,8192,256,0.014431999996304512
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,8192,128,0.014751999638974667
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,8192,256,0.0191040001809597
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,8192,128,0.011744000017642975
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,8192,128,0.01692800037562847
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,8192,64,0.015135999768972397
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,8192,64,0.015359999611973763
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,8192,64,0.015647999942302704
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,8192,32,0.015968000516295433
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,8192,32,0.01635199971497059
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,8192,32,0.015584000386297703
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,8192,65536,1.032480001449585
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,7168,12288,0.16806399822235107
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,7168,12288,0.16684800386428833
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,7168,16384,0.21667200326919556
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,7168,16384,0.21478399634361267
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,7168,16384,0.26873600482940674
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,7168,10240,0.14524799585342407
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,7168,10240,0.14803199470043182
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,7168,65536,0.7901759743690491
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,7168,12288,0.20336000621318817
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,7168,8192,0.12156800180673599
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,7168,65536,0.7725120186805725
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,7168,10240,0.17324799299240112
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,7168,8192,0.12124799937009811
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,7168,7168,0.10931199789047241
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,7168,8192,0.14025600254535675
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,7168,6144,0.09539200365543365
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,7168,6144,0.09622400254011154
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,7168,7168,0.12515200674533844
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,7168,5120,0.08499199897050858
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,7168,6144,0.11027199774980545
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,7168,5120,0.08499199897050858
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,7168,4096,0.07168000191450119
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,7168,4096,0.07254400104284286
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,7168,5120,0.0931520015001297
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,7168,3584,0.06643199920654297
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,7168,3584,0.08966399729251862
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,7168,4096,0.07753600180149078
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,7168,7168,0.10931199789047241
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,7168,3072,0.059328000992536545
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,7168,2560,0.052480001002550125
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,7168,3584,0.06931199878454208
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,7168,2560,0.05417599901556969
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,7168,3072,0.06070400029420853
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,7168,3072,0.06176000088453293
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,7168,2560,0.05353600159287453
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,7168,2048,0.047488000243902206
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,7168,2048,0.04163200035691261
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,7168,1536,0.032896000891923904
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,7168,1536,0.06294400244951248
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,7168,2048,0.04540799930691719
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,7168,1024,0.0261439997702837
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,7168,1536,0.03811199963092804
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,7168,1024,0.04575999826192856
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,7168,768,0.03807999938726425
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,7168,1024,0.029791999608278275
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,7168,768,0.022336000576615334
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,7168,512,0.01772800087928772
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,7168,768,0.02703999914228916
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,7168,512,0.017632000148296356
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,7168,512,0.02364799939095974
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,7168,256,0.013919999822974205
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,7168,256,0.01414399966597557
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,7168,256,0.018783999606966972
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,7168,65536,1.0241600275039673
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,7168,128,0.012032000347971916
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,7168,128,0.012000000104308128
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,7168,128,0.01408000010997057
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,7168,64,0.01532800029963255
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,7168,64,0.014783999882638454
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,7168,64,0.012736000120639801
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,7168,32,0.018271999433636665
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,7168,32,0.016575999557971954
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,7168,32,0.012703999876976013
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,6144,12288,0.14361600577831268
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,6144,12288,0.17235200107097626
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,6144,16384,0.18905599415302277
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,6144,16384,0.18559999763965607
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,6144,10240,0.12508800625801086
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,6144,10240,0.16198399662971497
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,6144,12288,0.20348800718784332
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,6144,16384,0.26688000559806824
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,6144,8192,0.1029760017991066
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,6144,65536,0.6575999855995178
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,6144,7168,0.09363199770450592
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,6144,8192,0.2706240117549896
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,6144,10240,0.17318400740623474
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,6144,8192,0.14028799533843994
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,6144,6144,0.10249599814414978
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,6144,6144,0.08230400085449219
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,6144,7168,0.11689600348472595
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,6144,5120,0.07353600114583969
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,6144,6144,0.10950399935245514
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,6144,7168,0.1252480000257492
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,6144,4096,0.062431998550891876
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,6144,5120,0.08684799820184708
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,6144,4096,0.07116799801588058
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,6144,5120,0.09299200028181076
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,6144,3584,0.05798399820923805
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,6144,65536,0.6827840209007263
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,6144,3584,0.06454399973154068
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,6144,4096,0.07676800340414047
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,6144,3584,0.06998399645090103
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,6144,3072,0.1072319969534874
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,6144,3072,0.05632000043988228
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,6144,2560,0.04707200080156326
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,6144,2560,0.04867200180888176
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,6144,3072,0.06099199876189232
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,6144,2048,0.04092799872159958
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,6144,2560,0.053568001836538315
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,6144,2048,0.041471999138593674
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,6144,2048,0.03859199956059456
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,6144,1536,0.03379200026392937
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,6144,1536,0.03315199911594391
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,6144,1024,0.02598400041460991
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,6144,1024,0.025855999439954758
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,6144,1536,0.03129599988460541
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,6144,1024,0.02473600022494793
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,6144,768,0.021503999829292297
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,6144,768,0.021056000143289566
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,6144,512,0.017823999747633934
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,6144,768,0.023296000435948372
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,6144,512,0.01775999926030636
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,6144,65536,1.032256007194519
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,6144,512,0.019680000841617584
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,6144,256,0.013632000423967838
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,6144,256,0.013632000423967838
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,6144,256,0.01484800036996603
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,6144,128,0.011744000017642975
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,6144,128,0.01190400030463934
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,6144,64,0.01484800036996603
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,6144,128,0.013824000023305416
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,6144,64,0.015168000012636185
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,6144,32,0.015647999942302704
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,6144,32,0.015904000028967857
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,6144,32,0.012512000277638435
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,5120,16384,0.29289600253105164
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,5120,12288,0.12179200351238251
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,6144,64,0.01283199992030859
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,5120,16384,0.1626880019903183
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,5120,12288,0.12323199957609177
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,5120,16384,0.22844800353050232
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,5120,10240,0.12995199859142303
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,5120,12288,0.1618880033493042
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,5120,10240,0.1870719939470291
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,5120,65536,0.6154239773750305
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,5120,8192,0.1504960060119629
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,5120,10240,0.14051200449466705
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,5120,65536,0.599232017993927
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,5120,8192,0.08723200112581253
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,5120,8192,0.10921599715948105
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,5120,7168,0.13651199638843536
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,5120,6144,0.06694400310516357
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,5120,7168,0.07756800204515457
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,5120,6144,0.08243200182914734
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,5120,7168,0.10492800176143646
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,5120,5120,0.05843200162053108
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,5120,5120,0.07820799946784973
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,5120,4096,0.04864000156521797
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,5120,4096,0.04940799996256828
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,5120,5120,0.059039998799562454
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,5120,6144,0.09206400066614151
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,5120,4096,0.05644800141453743
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,5120,3584,0.044319998472929
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,5120,3584,0.045343998819589615
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,5120,3072,0.047231998294591904
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,5120,3072,0.04025600105524063
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,5120,3584,0.05132799968123436
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,5120,3072,0.044704001396894455
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,5120,2560,0.03488000109791756
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,5120,2560,0.03513599932193756
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,5120,2048,0.06390400230884552
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,5120,2048,0.02969600073993206
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,5120,2560,0.03961599990725517
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,5120,2048,0.03200000151991844
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,5120,1536,0.025119999423623085
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,5120,1536,0.05129599943757057
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,5120,1536,0.030112000182271004
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,5120,1024,0.021503999829292297
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,5120,1024,0.035840000957250595
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,5120,1024,0.02380800060927868
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,5120,768,0.01679999940097332
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,5120,768,0.01692800037562847
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,5120,768,0.02208000048995018
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,5120,512,0.014399999752640724
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,5120,65536,0.5193600058555603
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,5120,512,0.014208000153303146
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,5120,512,0.018400000408291817
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,5120,256,0.011487999930977821
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,5120,256,0.011648000217974186
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,5120,256,0.014783999882638454
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,5120,128,0.01027199998497963
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,5120,128,0.010623999871313572
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,5120,128,0.012768000364303589
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,5120,64,0.013376000337302685
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,5120,64,0.013504000380635262
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,5120,32,0.01679999940097332
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,5120,64,0.01228800043463707
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,5120,32,0.015231999568641186
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,5120,32,0.011935999616980553
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,4096,12288,0.12329600006341934
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,4096,16384,0.15801599621772766
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,4096,12288,0.12131199985742569
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,4096,16384,0.13625599443912506
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,4096,16384,0.16275200247764587
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,4096,12288,0.10579200088977814
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,4096,10240,0.10358399897813797
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,4096,10240,0.10627199709415436
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,4096,8192,0.08636800199747086
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,4096,65536,0.6069759726524353
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,4096,10240,0.08956799656152725
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,4096,65536,0.602944016456604
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,4096,8192,0.08796799927949905
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,4096,8192,0.07315199822187424
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,4096,7168,0.07699199765920639
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,4096,6144,0.0663679987192154
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,4096,6144,0.07039999961853027
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,4096,7168,0.07897599786520004
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,4096,6144,0.05724800005555153
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,4096,7168,0.06531199812889099
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,4096,5120,0.0578560009598732
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,4096,5120,0.05993599817156792
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,4096,5120,0.049536000937223434
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,4096,4096,0.0488319993019104
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,4096,4096,0.04896000027656555
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,4096,3584,0.043807998299598694
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,4096,4096,0.0414079986512661
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,4096,3584,0.04531199857592583
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,4096,3072,0.03999999910593033
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,4096,3584,0.03782400116324425
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,4096,3072,0.033215999603271484
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,4096,2560,0.035551998764276505
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,4096,2560,0.03488000109791756
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,4096,2048,0.030527999624609947
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,4096,2560,0.02953599952161312
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,4096,2048,0.025919999927282333
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,4096,2048,0.03033600002527237
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,4096,1536,0.024768000468611717
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,4096,1536,0.025567999109625816
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,4096,1536,0.021856000646948814
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,4096,1024,0.020096000283956528
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,4096,1024,0.02035200037062168
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,4096,768,0.016704000532627106
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,4096,1024,0.017855999991297722
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,4096,65536,0.5190719962120056
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,4096,3072,0.040672000497579575
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,4096,768,0.017023999243974686
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,4096,512,0.01414399966597557
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,4096,768,0.017535999417304993
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,4096,512,0.013952000066637993
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,4096,512,0.015296000055968761
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,4096,256,0.011392000131309032
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,4096,256,0.011615999974310398
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,4096,256,0.012575999833643436
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,4096,128,0.010400000028312206
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,4096,128,0.010688000358641148
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,4096,128,0.011648000217974186
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,4096,64,0.013088000006973743
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,4096,64,0.013248000293970108
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,4096,32,0.013952000066637993
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,4096,64,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,4096,32,0.011359999887645245
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,4096,32,0.013856000266969204
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,3584,16384,0.16147199273109436
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,3584,12288,0.12028799951076508
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,3584,12288,0.12326399981975555
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,3584,16384,0.15750400722026825
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,3584,16384,0.1379839926958084
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,3584,10240,0.10339199751615524
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,3584,12288,0.10604800283908844
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,3584,10240,0.10623999685049057
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,3584,8192,0.08460800349712372
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,3584,10240,0.0899839997291565
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,3584,65536,0.593887984752655
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,3584,7168,0.07571200281381607
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,3584,8192,0.0872960016131401
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,3584,8192,0.07340800017118454
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,3584,6144,0.06646399945020676
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,3584,7168,0.07718399912118912
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,3584,6144,0.06707199662923813
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,3584,7168,0.06569600105285645
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,3584,5120,0.05711999908089638
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,3584,6144,0.05772799998521805
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,3584,65536,0.6121280193328857
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,3584,4096,0.048128001391887665
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,3584,4096,0.04979199916124344
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,3584,5120,0.059039998799562454
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,3584,4096,0.04153599962592125
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,3584,5120,0.0498879998922348
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,3584,3584,0.043296001851558685
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,3584,3584,0.04505600035190582
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,3584,3072,0.04009599983692169
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,3584,3072,0.040608000010252
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,3584,3584,0.03763199970126152
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,3584,2560,0.03619199991226196
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,3584,2560,0.035071998834609985
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,3584,3072,0.03363199904561043
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,3584,2560,0.03001599945127964
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,3584,2048,0.030112000182271004
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,3584,2048,0.03033600002527237
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,3584,1536,0.025151999667286873
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,3584,1536,0.03731200098991394
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,3584,1024,0.027295999228954315
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,3584,2048,0.025536000728607178
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,3584,1024,0.020255999639630318
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,3584,65536,0.5206080079078674
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,3584,1536,0.021663999184966087
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,3584,1024,0.017823999747633934
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,3584,768,0.02115200087428093
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,3584,512,0.014175999909639359
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,3584,768,0.016992000862956047
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,3584,512,0.014399999752640724
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,3584,768,0.01648000068962574
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,3584,256,0.011552000418305397
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,3584,512,0.014720000326633453
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,3584,256,0.01152000017464161
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,3584,128,0.010304000228643417
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,3584,256,0.012512000277638435
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,3584,128,0.010048000141978264
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,3584,128,0.011776000261306763
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,3584,64,0.013311999849975109
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,3584,64,0.013504000380635262
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,3584,64,0.010623999871313572
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,3584,32,0.01360000018030405
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,3584,32,0.013952000066637993
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,3584,32,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,3072,12288,0.09043200314044952
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,3072,16384,0.15811200439929962
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,3072,16384,0.11939200013875961
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,3072,16384,0.13660800457000732
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,3072,12288,0.10425599664449692
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,3072,10240,0.1069440022110939
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,3072,10240,0.08115199953317642
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,3072,12288,0.12422399967908859
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,3072,8192,0.08879999816417694
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,3072,8192,0.06710399687290192
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,3072,10240,0.08899199962615967
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,3072,7168,0.07609599828720093
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,3072,8192,0.07295999675989151
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,3072,7168,0.07667200267314911
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,3072,6144,0.06851200014352798
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,3072,6144,0.06739199906587601
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,3072,7168,0.06534399837255478
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,3072,65536,0.4275520145893097
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,3072,65536,0.43347200751304626
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,3072,6144,0.05753599852323532
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,3072,5120,0.05843200162053108
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,3072,5120,0.0586559996008873
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,3072,4096,0.048608001321554184
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,3072,4096,0.04956800118088722
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,3072,5120,0.04982399940490723
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,3072,3584,0.04451199993491173
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,3072,4096,0.041280001401901245
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,3072,3584,0.04527999833226204
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,3072,3072,0.04047999903559685
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,3072,3072,0.03929600119590759
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,3072,3584,0.03750399872660637
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,3072,65536,0.5189440250396729
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,3072,2560,0.03532800078392029
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,3072,2560,0.03516799956560135
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,3072,3072,0.03363199904561043
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,3072,2048,0.03001599945127964
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,3072,2560,0.029184000566601753
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,3072,1536,0.024639999493956566
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,3072,2048,0.029791999608278275
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,3072,1536,0.024831999093294144
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,3072,2048,0.025312000885605812
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,3072,1024,0.019936000928282738
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,3072,1536,0.020864000543951988
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,3072,1024,0.02006400004029274
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,3072,768,0.016448000445961952
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,3072,768,0.0163199994713068
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,3072,768,0.01583999954164028
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,3072,1024,0.016704000532627106
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,3072,512,0.014271999709308147
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,3072,512,0.01398400031030178
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,3072,512,0.01360000018030405
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,3072,256,0.011392000131309032
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,3072,256,0.011103999800980091
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,3072,128,0.010623999871313572
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,3072,256,0.011008000001311302
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,3072,128,0.01017600018531084
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,3072,128,0.010495999827980995
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,3072,64,0.013088000006973743
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,3072,64,0.012992000207304955
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,3072,64,0.009440000168979168
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,3072,32,0.013663999736309052
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,3072,32,0.013439999893307686
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,3072,32,0.009440000168979168
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,2560,12288,0.07782399654388428
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,2560,16384,0.10371199995279312
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,2560,12288,0.08092799782752991
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,2560,16384,0.09571199864149094
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,2560,16384,0.13702400028705597
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,2560,10240,0.06851200014352798
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,2560,12288,0.10729599744081497
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,2560,10240,0.0891840010881424
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,2560,10240,0.06915199756622314
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,2560,8192,0.059328000992536545
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,2560,8192,0.05708799883723259
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,2560,65536,0.38947200775146484
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,2560,8192,0.05657599866390228
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,2560,7168,0.05503999814391136
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,2560,65536,0.2933120131492615
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,2560,7168,0.05206400156021118
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,2560,6144,0.047168001532554626
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,2560,7168,0.05100800096988678
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,2560,6144,0.04508800059556961
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,2560,5120,0.039135999977588654
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,2560,6144,0.04483199864625931
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,2560,5120,0.0390079990029335
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,2560,4096,0.03340800106525421
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,2560,5120,0.04614400118589401
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,2560,4096,0.033440001308918
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,2560,3584,0.037087999284267426
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,2560,3072,0.05833600088953972
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,2560,3584,0.03017600066959858
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,2560,4096,0.03328000009059906
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,2560,3584,0.03481600061058998
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,2560,3072,0.02755199931561947
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,2560,3072,0.030432000756263733
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,2560,2560,0.021824000403285027
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,2560,65536,0.5209280252456665
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,2560,2560,0.024320000782608986
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,2560,2048,0.03888000175356865
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,2560,2560,0.02489599958062172
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,2560,2048,0.038656000047922134
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,2560,2048,0.024191999807953835
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,2560,1536,0.016896000131964684
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,2560,1536,0.03033600002527237
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,2560,1024,0.023072000592947006
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,2560,1024,0.02304000034928322
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,2560,1024,0.014240000396966934
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,2560,768,0.012640000320971012
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,2560,768,0.019071999937295914
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,2560,512,0.011264000087976456
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,2560,768,0.013663999736309052
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,2560,512,0.012128000147640705
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,2560,256,0.009983999654650688
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,2560,512,0.011935999616980553
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,2560,256,0.010080000385642052
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,2560,256,0.01017600018531084
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,2560,128,0.00902399979531765
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,2560,128,0.009279999881982803
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,2560,64,0.012671999633312225
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,2560,128,0.010143999941647053
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,2560,64,0.012864000163972378
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,2560,64,0.009440000168979168
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,2560,32,0.013120000250637531
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,2560,32,0.013919999822974205
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,2560,32,0.00940799992531538
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,2560,1536,0.016992000862956047
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,2048,12288,0.06940799951553345
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,2048,12288,0.06896000355482101
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,2048,16384,0.08550400286912918
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,2048,16384,0.09030400216579437
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,2048,16384,0.09763199836015701
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,2048,10240,0.05936000123620033
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,2048,10240,0.0589120015501976
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,2048,12288,0.07516799867153168
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,2048,8192,0.06976000219583511
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,2048,10240,0.06473600119352341
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,2048,65536,0.7360640168190002
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,2048,7168,0.04287999868392944
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,2048,65536,0.24329599738121033
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,2048,8192,0.04816000163555145
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,2048,8192,0.052319999784231186
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,2048,7168,0.044863998889923096
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,2048,6144,0.045343998819589615
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,2048,6144,0.03920000046491623
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,2048,7168,0.047488000243902206
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,2048,5120,0.03468799963593483
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,2048,5120,0.033440001308918
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,2048,6144,0.041728001087903976
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,2048,4096,0.039455998688936234
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,2048,4096,0.02953599952161312
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,2048,5120,0.03625600039958954
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,2048,4096,0.030112000182271004
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,2048,3584,0.026176000013947487
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,2048,3584,0.03606399893760681
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,2048,3584,0.028031999245285988
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,2048,65536,0.4063679873943329
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,2048,3072,0.0244159996509552
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,2048,3072,0.031968001276254654
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,2048,3072,0.024927999824285507
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,2048,2560,0.021215999498963356
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,2048,2560,0.02828799933195114
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,2048,2560,0.02191999927163124
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,2048,2048,0.01913600042462349
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,2048,2048,0.02393599972128868
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,2048,2048,0.019840000197291374
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,2048,1536,0.016383999958634377
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,2048,1536,0.017023999243974686
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,2048,1536,0.016704000532627106
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,2048,1024,0.014336000196635723
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,2048,1024,0.016704000532627106
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,2048,768,0.012256000190973282
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,2048,768,0.012480000033974648
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,2048,1024,0.013952000066637993
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,2048,512,0.011103999800980091
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,2048,768,0.013344000093638897
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,2048,512,0.011136000044643879
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,2048,512,0.01206399966031313
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,2048,256,0.009855999611318111
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,2048,256,0.009983999654650688
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,2048,256,0.01033599954098463
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,2048,128,0.009216000325977802
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,2048,128,0.009184000082314014
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,2048,128,0.009279999881982803
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,2048,64,0.012703999876976013
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,2048,64,0.012543999589979649
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,2048,64,0.008960000239312649
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,2048,32,0.012703999876976013
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,2048,32,0.013055999763309956
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,2048,32,0.009184000082314014
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,1536,12288,0.05852799862623215
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,1536,12288,0.05766399949789047
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,1536,16384,0.10400000214576721
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,1536,16384,0.06947200000286102
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,1536,16384,0.08051200211048126
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,1536,12288,0.06217600032687187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,1536,10240,0.05756799876689911
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,1536,10240,0.05395200103521347
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,1536,8192,0.0562559999525547
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,1536,10240,0.053279999643564224
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,1536,8192,0.04934399947524071
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,1536,65536,0.18883199989795685
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,1536,7168,0.043327998369932175
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,1536,65536,0.33500799536705017
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,1536,65536,0.3041920065879822
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,1536,6144,0.03807999938726425
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,1536,7168,0.05145600065588951
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,1536,6144,0.039103999733924866
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,1536,5120,0.03286400064826012
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,1536,7168,0.03843199834227562
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,1536,6144,0.03452799841761589
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,1536,5120,0.048576001077890396
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,1536,5120,0.030239999294281006
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,1536,4096,0.028255999088287354
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,1536,4096,0.02908799983561039
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,1536,8192,0.04419200122356415
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,1536,3584,0.03606399893760681
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,1536,3584,0.026688000187277794
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,1536,4096,0.02550400048494339
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,1536,3072,0.03407999873161316
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,1536,3584,0.023552000522613525
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,1536,3072,0.024064000695943832
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,1536,2560,0.021215999498963356
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,1536,3072,0.021344000473618507
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,1536,2560,0.021536000072956085
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,1536,2048,0.01881599985063076
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,1536,2560,0.018751999363303185
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,1536,2048,0.024351999163627625
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,1536,1536,0.017023999243974686
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,1536,1536,0.02051199972629547
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,1536,2048,0.016831999644637108
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,1536,1536,0.014720000326633453
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,1536,1024,0.01360000018030405
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,1536,768,0.01206399966031313
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,1536,1024,0.014336000196635723
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,1536,1024,0.012384000234305859
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,1536,768,0.01235199999064207
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,1536,512,0.01104000024497509
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,1536,512,0.011264000087976456
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,1536,512,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,1536,768,0.01228800043463707
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,1536,256,0.009535999968647957
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,1536,256,0.00940799992531538
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,1536,256,0.010048000141978264
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,1536,128,0.009535999968647957
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,1536,128,0.00940799992531538
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,1536,64,0.012415999546647072
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,1536,64,0.008927999995648861
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,1536,64,0.012703999876976013
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,1536,128,0.009279999881982803
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,1536,32,0.01244799979031086
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,1536,32,0.008895999751985073
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,1536,32,0.012512000277638435
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,1024,12288,0.043616000562906265
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,1024,12288,0.044576000422239304
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,1024,16384,0.0997759997844696
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,1024,16384,0.05478399991989136
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,1024,16384,0.05663999915122986
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,1024,12288,0.044256001710891724
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,1024,10240,0.03788800165057182
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,1024,10240,0.04092799872159958
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,1024,10240,0.04016000032424927
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,1024,8192,0.051552001386880875
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,1024,8192,0.03852799907326698
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,1024,65536,0.20272000133991241
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,1024,7168,0.03612799942493439
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,1024,7168,0.03359999880194664
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,1024,8192,0.032416000962257385
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,1024,7168,0.029152000322937965
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,1024,65536,0.15804800391197205
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,1024,65536,0.15756799280643463
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,1024,6144,0.03791999816894531
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,1024,6144,0.031647998839616776
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,1024,5120,0.0331839993596077
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,1024,5120,0.04032000154256821
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,1024,6144,0.025696000084280968
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,1024,5120,0.022816000506281853
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,1024,4096,0.028511999174952507
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,1024,4096,0.019936000928282738
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,1024,4096,0.02316799946129322
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,1024,3584,0.021247999742627144
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,1024,3584,0.02179200015962124
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,1024,3072,0.01974399946630001
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,1024,3584,0.01849599927663803
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,1024,3072,0.027456000447273254
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,1024,2560,0.01759999990463257
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,1024,2560,0.01788800023496151
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,1024,3072,0.017184000462293625
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,1024,2048,0.0161920003592968
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,1024,2560,0.015072000212967396
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,1024,2048,0.021344000473618507
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,1024,2048,0.01360000018030405
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,1024,1536,0.01398400031030178
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,1024,1536,0.013952000066637993
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,1024,1024,0.014271999709308147
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,1024,1024,0.012512000277638435
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,1024,1536,0.012191999703645706
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,1024,1024,0.013024000450968742
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,1024,768,0.010591999627649784
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,1024,768,0.010623999871313572
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,1024,768,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,1024,512,0.011008000001311302
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,1024,512,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,1024,512,0.01158399973064661
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,1024,256,0.008927999995648861
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,1024,256,0.009216000325977802
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,1024,128,0.008448000065982342
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,1024,256,0.009824000298976898
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,1024,128,0.00863999966531992
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,1024,64,0.012160000391304493
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,1024,64,0.01228800043463707
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,1024,64,0.00854399986565113
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,1024,32,0.012543999589979649
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,1024,32,0.01244799979031086
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,1024,32,0.0081599997356534
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,768,12288,0.04265600070357323
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,768,16384,0.062463998794555664
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,768,16384,0.0480320006608963
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,1024,128,0.008287999778985977
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,768,16384,0.05632000043988228
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,768,12288,0.041919998824596405
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,768,12288,0.04383999854326248
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,768,10240,0.05206400156021118
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,768,10240,0.03798399865627289
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,768,10240,0.03843199834227562
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,768,8192,0.034591998904943466
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,768,8192,0.03488000109791756
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,768,7168,0.032287999987602234
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,768,8192,0.031936001032590866
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,768,65536,0.18825599551200867
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,768,65536,0.19228799641132355
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,768,6144,0.038047999143600464
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,768,7168,0.032607998698949814
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,768,7168,0.02879999950528145
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,768,6144,0.030400000512599945
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,768,5120,0.033215999603271484
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,768,6144,0.02566399984061718
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,768,5120,0.027103999629616737
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,768,65536,0.20175999402999878
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,768,4096,0.022624000906944275
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,768,4096,0.023231999948620796
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,768,5120,0.02271999977529049
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,768,3584,0.02595200017094612
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,768,3584,0.021983999758958817
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,768,4096,0.019487999379634857
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,768,3584,0.018400000408291817
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,768,3072,0.023711999878287315
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,768,3072,0.019487999379634857
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,768,2560,0.021536000072956085
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,768,3072,0.01679999940097332
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,768,2560,0.017920000478625298
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,768,2560,0.014944000169634819
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,768,2048,0.01583999954164028
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,768,2048,0.01635199971497059
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,768,1536,0.01375999953597784
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,768,1536,0.013887999579310417
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,768,2048,0.013663999736309052
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,768,1536,0.01190400030463934
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,768,1024,0.012703999876976013
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,768,1024,0.012223999947309494
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,768,768,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,768,1024,0.012927999719977379
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,768,768,0.010208000428974628
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,768,768,0.012671999633312225
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,768,512,0.009664000011980534
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,768,512,0.009535999968647957
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,768,256,0.009279999881982803
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,768,256,0.008960000239312649
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,768,512,0.011327999643981457
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,768,128,0.0080960001796484
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,768,256,0.009056000038981438
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,768,128,0.008736000396311283
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,768,128,0.008736000396311283
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,768,64,0.011776000261306763
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,768,64,0.011648000217974186
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,768,64,0.00825599953532219
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,768,32,0.011872000060975552
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,768,32,0.01190400030463934
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,768,32,0.00825599953532219
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,512,12288,0.037248000502586365
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,512,12288,0.036959998309612274
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,512,16384,0.045024000108242035
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,512,16384,0.04291199892759323
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,512,16384,0.034591998904943466
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,512,12288,0.028031999245285988
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,512,10240,0.03526400029659271
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,512,10240,0.0342399999499321
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,512,8192,0.03311999887228012
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,512,10240,0.024320000782608986
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,512,8192,0.032607998698949814
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,512,65536,0.0936959981918335
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,512,65536,0.1329279989004135
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,512,7168,0.03404799848794937
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,512,8192,0.02115200087428093
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,512,7168,0.02985600009560585
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,512,7168,0.0197759997099638
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,512,6144,0.02751999907195568
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,512,6144,0.025696000084280968
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,512,5120,0.025567999109625816
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,512,6144,0.017632000148296356
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,512,5120,0.024000000208616257
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,512,4096,0.023520000278949738
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,512,65536,0.11606399714946747
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,512,4096,0.020800000056624413
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,512,5120,0.01600000075995922
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,512,3584,0.02127999998629093
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,512,3584,0.01958400011062622
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,512,3584,0.013120000250637531
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,512,3072,0.018751999363303185
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,512,3072,0.012319999746978283
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,512,3072,0.017696000635623932
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,512,2560,0.017023999243974686
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,512,2560,0.024671999737620354
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,512,2560,0.011615999974310398
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,512,2048,0.01500799972563982
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,512,2048,0.02143999934196472
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,512,2048,0.012575999833643436
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,512,1536,0.01820800080895424
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,512,1536,0.013279999606311321
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,512,1024,0.011711999773979187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,512,1536,0.01027199998497963
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,512,1024,0.011455999687314034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,512,1024,0.009920000098645687
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,512,4096,0.014240000396966934
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,512,768,0.009983999654650688
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,512,512,0.009151999838650227
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,512,768,0.0098879998549819
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,512,512,0.009216000325977802
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,512,768,0.009727999567985535
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,512,512,0.009727999567985535
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,512,256,0.00854399986565113
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,512,256,0.00863999966531992
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,512,256,0.00902399979531765
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,512,128,0.008063999935984612
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,512,128,0.009983999654650688
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,512,64,0.01152000017464161
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,512,128,0.008927999995648861
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,512,64,0.01190400030463934
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,512,32,0.01142400037497282
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,512,64,0.008191999979317188
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,512,32,0.011552000418305397
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,512,32,0.008320000022649765
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,256,12288,0.03139200061559677
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,256,12288,0.033824000507593155
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,256,16384,0.034752000123262405
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,256,16384,0.035551998764276505
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,256,12288,0.04230400174856186
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,256,16384,0.03455999866127968
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,256,10240,0.04108799993991852
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,256,10240,0.029791999608278275
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,256,8192,0.03903999924659729
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,256,8192,0.026976000517606735
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,256,10240,0.036448001861572266
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,256,7168,0.026623999699950218
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,256,65536,0.07571200281381607
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,256,65536,0.07747200131416321
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,256,8192,0.02316799946129322
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,256,7168,0.025119999423623085
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,256,7168,0.019231999292969704
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,256,6144,0.024607999250292778
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,256,5120,0.02208000048995018
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,256,6144,0.024992000311613083
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,256,5120,0.025631999596953392
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,256,6144,0.01740800030529499
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,256,5120,0.015615999698638916
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,256,4096,0.022143999114632607
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,256,65536,0.11667200177907944
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,256,4096,0.02099199965596199
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,256,4096,0.014047999866306782
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,256,3584,0.01881599985063076
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,256,3584,0.019487999379634857
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,256,3072,0.01817600056529045
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,256,3584,0.013279999606311321
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,256,3072,0.01708799973130226
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,256,3072,0.012480000033974648
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,256,2560,0.01692800037562847
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,256,2560,0.016287999227643013
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,256,2048,0.014368000440299511
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,256,2048,0.014336000196635723
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,256,2048,0.010688000358641148
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,256,1536,0.013024000450968742
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,256,2560,0.012000000104308128
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,256,1536,0.013504000380635262
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,256,1536,0.009824000298976898
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,256,1024,0.011296000331640244
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,256,1024,0.011776000261306763
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,256,1024,0.009119999594986439
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,256,768,0.010111999697983265
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,256,768,0.009855999611318111
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,256,768,0.011264000087976456
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,256,512,0.009184000082314014
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,256,512,0.008895999751985073
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,256,512,0.010208000428974628
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,256,256,0.007968000136315823
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,256,256,0.007903999648988247
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,256,128,0.00800000037997961
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,256,256,0.009056000038981438
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,256,128,0.007840000092983246
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,256,128,0.008352000266313553
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,256,64,0.011680000461637974
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,256,64,0.011711999773979187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,256,64,0.00800000037997961
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,256,32,0.012191999703645706
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,256,32,0.011648000217974186
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,256,32,0.007807999849319458
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,128,12288,0.027424000203609467
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,128,16384,0.03081599995493889
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,128,16384,0.03526400029659271
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,128,16384,0.0395519994199276
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,128,12288,0.0272000003606081
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,128,10240,0.026016000658273697
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,128,10240,0.026208000257611275
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,128,12288,0.02735999971628189
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,128,10240,0.024351999163627625
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,128,65536,0.0652799978852272
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,128,8192,0.023679999634623528
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,128,8192,0.024927999824285507
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,128,8192,0.029791999608278275
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,128,7168,0.024639999493956566
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,128,6144,0.029920000582933426
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,128,65536,0.06668800115585327
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,128,7168,0.019360000267624855
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,128,6144,0.02191999927163124
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,128,7168,0.023615999147295952
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,128,6144,0.01708799973130226
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,128,5120,0.025599999353289604
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,128,5120,0.02191999927163124
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,128,4096,0.020735999569296837
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,128,4096,0.020255999639630318
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,128,5120,0.01568000018596649
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,128,4096,0.015231999568641186
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,128,65536,0.11670400202274323
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,128,3584,0.019840000197291374
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,128,3584,0.01958400011062622
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,128,3584,0.013024000450968742
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,128,3072,0.01833599992096424
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,128,3072,0.018112000077962875
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,128,2560,0.01616000011563301
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,128,2560,0.01648000068962574
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,128,3072,0.01235199999064207
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,128,2560,0.01152000017464161
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,128,2048,0.015072000212967396
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,128,2048,0.0144640002399683
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,128,1536,0.013632000423967838
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,128,2048,0.010400000028312206
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,128,1024,0.011648000217974186
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,128,1024,0.011327999643981457
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,128,1536,0.012927999719977379
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,128,1536,0.0098879998549819
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,128,768,0.009759999811649323
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,128,1024,0.008960000239312649
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,128,768,0.009664000011980534
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,128,512,0.008671999908983707
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,128,768,0.011231999844312668
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,128,512,0.009088000282645226
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,128,256,0.008511999621987343
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,128,512,0.010080000385642052
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,128,256,0.00825599953532219
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,128,128,0.008832000195980072
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,128,256,0.008927999995648861
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,128,128,0.008704000152647495
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,128,64,0.01152000017464161
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,128,64,0.011071999557316303
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,128,128,0.008191999979317188
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,128,64,0.00800000037997961
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,128,32,0.012000000104308128
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,128,32,0.011744000017642975
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,128,32,0.008063999935984612
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,64,12288,0.028416000306606293
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,64,12288,0.027744000777602196
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,64,16384,0.033376000821590424
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,64,16384,0.03129599988460541
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,64,16384,0.03936000168323517
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,64,12288,0.04227200150489807
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,64,10240,0.026496000587940216
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,64,10240,0.026176000013947487
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,64,10240,0.02396799996495247
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,64,8192,0.026335999369621277
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,64,8192,0.02470399998128414
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,64,8192,0.02969600073993206
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,64,7168,0.02505600079894066
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,64,7168,0.02412799932062626
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,64,6144,0.02396799996495247
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,64,65536,0.0708480030298233
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,64,7168,0.019168000668287277
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,64,65536,0.06272000074386597
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,64,6144,0.02252800017595291
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,64,5120,0.02223999984562397
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,64,5120,0.021824000403285027
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,64,6144,0.01724799908697605
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,64,4096,0.021888000890612602
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,64,5120,0.021215999498963356
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,64,4096,0.021056000143289566
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,64,4096,0.014047999866306782
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,64,65536,0.11664000153541565
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,64,3584,0.02038400061428547
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,64,3584,0.019519999623298645
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,64,3584,0.013824000023305416
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,64,3072,0.01744000054895878
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,64,3072,0.017376000061631203
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,64,2560,0.01583999954164028
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,64,3072,0.01235199999064207
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,64,2560,0.016127999871969223
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,64,2048,0.014208000153303146
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,64,2560,0.01142400037497282
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,64,2048,0.014911999925971031
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,64,2048,0.010463999584317207
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,64,1024,0.011711999773979187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,64,1536,0.010239999741315842
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,64,1536,0.013151999562978745
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,64,1024,0.011359999887645245
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,64,1024,0.009375999681651592
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,64,768,0.009312000125646591
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,64,768,0.009312000125646591
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,64,512,0.008704000152647495
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,64,512,0.008767999708652496
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,64,768,0.01033599954098463
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,64,512,0.009151999838650227
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,64,256,0.008287999778985977
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,64,256,0.008063999935984612
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,64,256,0.008671999908983707
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,64,128,0.007648000027984381
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,64,128,0.007968000136315823
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,64,128,0.008063999935984612
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,64,64,0.011359999887645245
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,64,64,0.01119999960064888
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,64,1536,0.012799999676644802
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,64,32,0.011327999643981457
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,64,64,0.007872000336647034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,64,32,0.011455999687314034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,64,32,0.008031999692320824
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,32,12288,0.028351999819278717
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,32,12288,0.02755199931561947
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,32,16384,0.03468799963593483
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,32,16384,0.0307839997112751
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,32,16384,0.05075199902057648
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,32,12288,0.04108799993991852
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,32,10240,0.026528000831604004
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,32,10240,0.02630399912595749
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,32,8192,0.026367999613285065
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,32,10240,0.03292800113558769
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,32,8192,0.024607999250292778
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,32,65536,0.06630399823188782
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,32,65536,0.06956800073385239
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,32,7168,0.024992000311613083
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,32,8192,0.02816000021994114
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,32,6144,0.024032000452280045
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,32,7168,0.0244159996509552
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,32,6144,0.022975999861955643
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,32,7168,0.025119999423623085
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,32,5120,0.02287999913096428
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,32,6144,0.02396799996495247
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,32,5120,0.02160000056028366
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,32,65536,0.11184000223875046
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,32,4096,0.021856000646948814
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,32,5120,0.01990400068461895
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,32,3584,0.018592000007629395
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,32,4096,0.02112000063061714
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,32,4096,0.017184000462293625
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,32,3584,0.01945599913597107
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,32,3584,0.015936000272631645
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,32,3072,0.016767999157309532
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,32,3072,0.01759999990463257
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,32,3072,0.014495999552309513
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,32,2560,0.01648000068962574
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,32,2560,0.015904000028967857
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,32,2560,0.013632000423967838
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,32,2048,0.015104000456631184
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,32,2048,0.011744000017642975
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,32,2048,0.014431999996304512
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,32,1536,0.012927999719977379
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,32,1536,0.013376000337302685
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,32,1024,0.011359999887645245
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,32,1536,0.01065600011497736
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,32,1024,0.01152000017464161
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,32,1024,0.009727999567985535
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,32,768,0.009535999968647957
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,32,768,0.009600000455975533
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,32,512,0.008576000109314919
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,32,768,0.010015999898314476
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,32,512,0.008895999751985073
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,32,512,0.009247999638319016
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,32,256,0.008191999979317188
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,32,256,0.008832000195980072
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,32,256,0.007968000136315823
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,32,128,0.007840000092983246
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,32,64,0.011296000331640244
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,32,128,0.0080960001796484
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,32,128,0.007584000006318092
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,32,64,0.011680000461637974
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,32,64,0.007840000092983246
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,768,32,32,0.011327999643981457
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,768,32,32,0.011552000418305397
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,768,32,32,0.007840000092983246
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,65536,16384,1.2246079444885254
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,65536,10240,0.7578880190849304
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,65536,8192,0.6137599945068359
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,65536,12288,0.9297599792480469
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,65536,12288,0.9054080247879028
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,65536,10240,0.781216025352478
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,65536,12288,0.8071680068969727
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,65536,7168,0.5423679947853088
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,65536,16384,1.2087359428405762
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,65536,8192,0.631488025188446
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,65536,10240,0.6741120219230652
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,65536,6144,0.4676479995250702
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,65536,16384,1.062175989151001
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,65536,8192,0.5501440167427063
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,65536,7168,0.5566080212593079
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,65536,6144,0.4821760058403015
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,65536,5120,0.39846399426460266
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,65536,4096,0.331712007522583
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,65536,4096,0.3270080089569092
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,65536,5120,0.40668800473213196
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,65536,7168,0.489439994096756
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,65536,6144,0.4227199852466583
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,65536,3072,0.25679999589920044
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,65536,3584,0.29023998975753784
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,65536,3584,0.2949120104312897
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,65536,4096,0.2959679961204529
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,65536,5120,0.36028799414634705
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,65536,2560,0.21379199624061584
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,65536,2560,0.22092799842357635
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,65536,3072,0.25248000025749207
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,65536,3584,0.2669439911842346
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,65536,2048,0.1777919977903366
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,65536,1536,0.14060799777507782
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,65536,3072,0.2319359928369522
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,65536,2048,0.17801600694656372
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,65536,1536,0.14323200285434723
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,65536,2560,0.2014079988002777
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,65536,2048,0.16947199404239655
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,65536,1024,0.09827200323343277
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,65536,1024,0.09961599856615067
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,65536,1536,0.13814400136470795
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,65536,768,0.081216000020504
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,65536,1024,0.10745599865913391
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,65536,512,0.05920000001788139
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,65536,768,0.07715199887752533
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,65536,512,0.05734400078654289
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,65536,256,0.04188799858093262
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,65536,768,0.0920960009098053
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,65536,256,0.038784001022577286
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,65536,512,0.07631999999284744
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,65536,128,0.03519999980926514
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,65536,128,0.034272000193595886
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,65536,256,0.05929600074887276
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,65536,64,0.03471999987959862
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,65536,128,0.048576001077890396
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,65536,32,0.03855999931693077
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,65536,32,0.03862399980425835
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,65536,64,0.03436800092458725
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,65536,64,0.04403200000524521
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,65536,32,0.043296001851558685
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,16384,12288,0.2380480021238327
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,16384,12288,0.23321600258350372
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,16384,16384,0.31385600566864014
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,16384,10240,0.2035840004682541
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,16384,16384,0.3091199994087219
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,16384,16384,0.26895999908447266
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,16384,10240,0.19894400238990784
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,16384,12288,0.20604799687862396
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,16384,8192,0.16076800227165222
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,16384,65536,1.214624047279358
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,16384,8192,0.16518400609493256
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,16384,7168,0.14524799585342407
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,16384,10240,0.17500799894332886
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,16384,7168,0.1443839967250824
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,16384,8192,0.1422400027513504
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,16384,6144,0.1268160045146942
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,16384,6144,0.12415999919176102
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,16384,65536,1.1785600185394287
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,16384,5120,0.10665600001811981
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,16384,7168,0.12691199779510498
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,16384,6144,0.1106560006737709
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,16384,4096,0.0896959975361824
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,16384,5120,0.10857599973678589
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,16384,4096,0.0894400030374527
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,16384,5120,0.0944959968328476
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,16384,3584,0.07926400005817413
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,16384,3584,0.08111999928951263
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,16384,4096,0.07897599786520004
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,16384,3072,0.07171200215816498
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,16384,3584,0.07097599655389786
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,16384,3072,0.07094399631023407
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,16384,2560,0.0607680007815361
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,16384,3072,0.06304000318050385
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,16384,2560,0.06124800071120262
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,16384,2048,0.05052800104022026
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,16384,2560,0.05462399870157242
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,16384,2048,0.050464000552892685
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,16384,1536,0.04070400074124336
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,16384,2048,0.047040000557899475
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,16384,1536,0.04118400067090988
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,16384,1024,0.030239999294281006
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,16384,1024,0.030239999294281006
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,16384,1536,0.03855999931693077
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,16384,768,0.02505600079894066
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,16384,768,0.025280000641942024
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,16384,1024,0.03167999908328056
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,16384,768,0.027615999802947044
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,16384,512,0.02054399996995926
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,16384,512,0.020287999883294106
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,16384,256,0.015424000099301338
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,16384,256,0.015263999812304974
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,16384,512,0.02380800060927868
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,16384,128,0.014112000353634357
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,16384,128,0.013663999736309052
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,16384,128,0.01708799973130226
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,16384,64,0.016287999227643013
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,16384,64,0.01587199978530407
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,16384,64,0.015904000028967857
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,16384,32,0.016992000862956047
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,16384,32,0.017632000148296356
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,16384,32,0.015968000516295433
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,16384,256,0.01958400011062622
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,16384,65536,1.0295039415359497
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,12288,16384,0.29715201258659363
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,12288,12288,0.2242240011692047
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,12288,12288,0.18515199422836304
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,12288,16384,0.24076800048351288
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,12288,10240,0.1615999937057495
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,12288,16384,0.2693119943141937
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,12288,10240,0.160863995552063
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,12288,12288,0.20761600136756897
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,12288,8192,0.13225600123405457
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,12288,8192,0.13036799430847168
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,12288,7168,0.13715200126171112
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,12288,7168,0.11929599940776825
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,12288,10240,0.1720000058412552
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,12288,65536,0.8717120289802551
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,12288,8192,0.14076800644397736
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,12288,6144,0.10134399682283401
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,12288,6144,0.10300800204277039
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,12288,7168,0.12601600587368011
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,12288,5120,0.08668799698352814
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,12288,5120,0.08659200370311737
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,12288,6144,0.10972800105810165
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,12288,4096,0.06911999732255936
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,12288,3584,0.06351999938488007
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,12288,5120,0.09391999989748001
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,12288,65536,0.9068160057067871
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,12288,3584,0.06358399987220764
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,12288,4096,0.07046400010585785
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,12288,4096,0.07798399776220322
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,12288,3072,0.05632000043988228
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,12288,3584,0.07020799815654755
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,12288,3072,0.05555199831724167
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,12288,2560,0.05939200147986412
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,12288,2560,0.04825599864125252
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,12288,3072,0.06239999830722809
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,12288,2560,0.0544000007212162
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,12288,2048,0.04044799879193306
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,12288,2048,0.04095999896526337
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,12288,1536,0.032416000962257385
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,12288,2048,0.04598399996757507
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,12288,1536,0.03356799855828285
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,12288,1024,0.025280000641942024
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,12288,1536,0.03718400001525879
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,12288,1024,0.02550400048494339
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,12288,1024,0.029600000008940697
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,12288,768,0.02175999991595745
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,12288,768,0.021503999829292297
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,12288,768,0.024992000311613083
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,12288,512,0.017535999417304993
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,12288,512,0.017376000061631203
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,12288,65536,1.0324480533599854
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,12288,512,0.021247999742627144
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,12288,256,0.013824000023305416
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,12288,256,0.013887999579310417
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,12288,256,0.016095999628305435
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,12288,128,0.012160000391304493
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,12288,128,0.01196799986064434
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,12288,64,0.015104000456631184
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,12288,128,0.014208000153303146
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,12288,64,0.014879999682307243
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,12288,64,0.013088000006973743
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,12288,32,0.015968000516295433
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,12288,32,0.015647999942302704
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,12288,32,0.012896000407636166
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,10240,12288,0.16016000509262085
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,10240,12288,0.15673600137233734
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,10240,16384,0.24617600440979004
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,10240,16384,0.20259200036525726
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,10240,10240,0.13900800049304962
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,10240,10240,0.13753600418567657
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,10240,16384,0.2559039890766144
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,10240,12288,0.20457600057125092
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,10240,8192,0.11395200341939926
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,10240,10240,0.16864000260829926
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,10240,65536,0.9272639751434326
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,10240,8192,0.11548800021409988
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,10240,7168,0.10406400263309479
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,10240,8192,0.13212800025939941
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,10240,7168,0.10518400371074677
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,10240,6144,0.09062399715185165
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,10240,6144,0.09142400324344635
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,10240,7168,0.11961600184440613
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,10240,5120,0.08134400099515915
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,10240,5120,0.08150400221347809
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,10240,6144,0.10502400249242783
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,10240,4096,0.06889600306749344
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,10240,5120,0.08966399729251862
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,10240,3584,0.06281600147485733
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,10240,4096,0.07135999947786331
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,10240,3584,0.06908799707889557
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,10240,3072,0.05673599988222122
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,10240,3072,0.05558399856090546
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,10240,3584,0.06460800021886826
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,10240,3072,0.055743999779224396
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,10240,2560,0.04790399968624115
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,10240,4096,0.06844799965620041
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,10240,65536,0.7319679856300354
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,10240,2560,0.04841599985957146
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,10240,2048,0.03996799886226654
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,10240,2048,0.04076800122857094
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,10240,2560,0.048608001321554184
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,10240,1536,0.03248000144958496
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,10240,2048,0.03846399858593941
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,10240,1536,0.03315199911594391
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,10240,1024,0.02457600086927414
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,10240,1024,0.024992000311613083
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,10240,1536,0.03542400151491165
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,10240,1024,0.02659199945628643
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,10240,768,0.02127999998629093
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,10240,768,0.021183999255299568
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,10240,512,0.01833599992096424
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,10240,512,0.01788800023496151
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,10240,768,0.024512000381946564
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,10240,256,0.013887999579310417
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,10240,512,0.020128000527620316
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,10240,256,0.01375999953597784
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,10240,128,0.011776000261306763
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,10240,128,0.011711999773979187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,10240,65536,1.0408960580825806
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,10240,256,0.015776000916957855
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,10240,64,0.015072000212967396
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,10240,128,0.01414399966597557
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,10240,64,0.01462399959564209
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,10240,64,0.012671999633312225
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,10240,32,0.015200000256299973
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,10240,32,0.015615999698638916
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,10240,32,0.012736000120639801
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,8192,12288,0.12432000041007996
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,8192,12288,0.1215360015630722
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,8192,16384,0.15891200304031372
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,8192,16384,0.16230399906635284
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,8192,10240,0.10374400019645691
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,8192,10240,0.11177600175142288
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,8192,16384,0.13708800077438354
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,8192,12288,0.10540799796581268
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,8192,8192,0.0867839977145195
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,8192,65536,0.605023980140686
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,8192,8192,0.08585599809885025
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,8192,7168,0.07587199658155441
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,8192,10240,0.08988799899816513
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,8192,8192,0.07344000041484833
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,8192,7168,0.07980799674987793
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,8192,6144,0.066880002617836
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,8192,6144,0.06787200272083282
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,8192,7168,0.06566400080919266
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,8192,5120,0.057920001447200775
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,8192,6144,0.05804799869656563
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,8192,5120,0.058368001133203506
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,8192,4096,0.048448000103235245
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,8192,5120,0.050016000866889954
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,8192,4096,0.05119999870657921
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,8192,3584,0.04495999962091446
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,8192,3584,0.0453759990632534
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,8192,4096,0.041439998894929886
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,8192,3072,0.039712000638246536
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,8192,3072,0.03996799886226654
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,8192,3584,0.038784001022577286
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,8192,2560,0.03471999987959862
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,8192,2560,0.03446400165557861
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,8192,65536,0.5991039872169495
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,8192,3072,0.033663999289274216
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,8192,2048,0.030688000842928886
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,8192,2048,0.030079999938607216
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,8192,2560,0.030112000182271004
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,8192,1536,0.025119999423623085
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,8192,2048,0.02691200003027916
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,8192,1536,0.02550400048494339
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,8192,1536,0.0225600004196167
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,8192,1024,0.019200000911951065
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,8192,1024,0.0191040001809597
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,8192,1024,0.019231999292969704
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,8192,65536,0.5153599977493286
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,8192,768,0.016863999888300896
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,8192,768,0.016543999314308167
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,8192,768,0.017311999574303627
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,8192,512,0.014336000196635723
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,8192,512,0.014560000039637089
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,8192,256,0.011487999930977821
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,8192,512,0.015552000142633915
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,8192,256,0.011552000418305397
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,8192,256,0.012703999876976013
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,8192,128,0.010688000358641148
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,8192,128,0.010400000028312206
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,8192,64,0.013472000136971474
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,8192,128,0.01158399973064661
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,8192,64,0.013344000093638897
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,8192,64,0.011168000288307667
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,8192,32,0.014368000440299511
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,8192,32,0.01104000024497509
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,7168,16384,0.15772800147533417
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,7168,16384,0.16092799603939056
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,8192,32,0.013663999736309052
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,7168,12288,0.12083200365304947
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,7168,12288,0.12316799908876419
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,7168,16384,0.13814400136470795
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,7168,10240,0.1093439981341362
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,7168,12288,0.10476800054311752
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,7168,10240,0.10371199995279312
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,7168,8192,0.08643200248479843
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,7168,8192,0.08921600133180618
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,7168,10240,0.09030400216579437
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,7168,7168,0.0767040029168129
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,7168,7168,0.08012799918651581
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,7168,8192,0.0735040009021759
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,7168,6144,0.06703999638557434
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,7168,65536,0.6070719957351685
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,7168,7168,0.06576000154018402
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,7168,5120,0.05859199911355972
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,7168,6144,0.06771200150251389
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,7168,65536,0.5988159775733948
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,7168,5120,0.05926400050520897
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,7168,6144,0.0578560009598732
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,7168,4096,0.04870399832725525
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,7168,4096,0.05023999884724617
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,7168,5120,0.04995200037956238
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,7168,3584,0.04499199986457825
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,7168,3584,0.0451200008392334
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,7168,4096,0.04201599955558777
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,7168,3584,0.0387520007789135
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,7168,3072,0.038656000047922134
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,7168,3072,0.040383998304605484
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,7168,65536,0.5194560289382935
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,7168,3072,0.03446400165557861
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,7168,2560,0.035360001027584076
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,7168,2560,0.03484800085425377
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,7168,2048,0.02940800040960312
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,7168,2560,0.03014400042593479
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,7168,2048,0.029952000826597214
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,7168,2048,0.02659199945628643
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,7168,1536,0.024383999407291412
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,7168,1536,0.025407999753952026
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,7168,1536,0.022272000089287758
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,7168,1024,0.019551999866962433
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,7168,1024,0.019487999379634857
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,7168,768,0.016575999557971954
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,7168,1024,0.018464000895619392
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,7168,768,0.01664000004529953
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,7168,512,0.014560000039637089
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,7168,768,0.017472000792622566
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,7168,512,0.014175999909639359
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,7168,512,0.015039999969303608
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,7168,256,0.011168000288307667
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,7168,256,0.011103999800980091
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,7168,256,0.012736000120639801
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,7168,128,0.010688000358641148
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,7168,64,0.013504000380635262
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,7168,128,0.010944000445306301
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,7168,128,0.011487999930977821
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,7168,64,0.013472000136971474
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,7168,64,0.010879999957978725
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,7168,32,0.013952000066637993
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,7168,32,0.014112000353634357
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,7168,32,0.01119999960064888
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,6144,16384,0.15199999511241913
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,6144,12288,0.12198399752378464
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,6144,12288,0.12425599992275238
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,6144,16384,0.15455999970436096
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,6144,16384,0.13715200126171112
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,6144,10240,0.10099200159311295
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,6144,12288,0.10492800176143646
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,6144,10240,0.10342399775981903
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,6144,65536,0.6116799712181091
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,6144,65536,0.5419840216636658
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,6144,8192,0.08787199854850769
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,6144,8192,0.0859839990735054
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,6144,10240,0.08934400230646133
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,6144,7168,0.08089599758386612
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,6144,8192,0.07340800017118454
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,6144,6144,0.06716799736022949
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,6144,7168,0.08128000050783157
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,6144,6144,0.06761600077152252
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,6144,7168,0.06595200300216675
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,6144,6144,0.058079998940229416
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,6144,5120,0.061055999249219894
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,6144,5120,0.05788800120353699
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,6144,4096,0.04870399832725525
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,6144,5120,0.049375999718904495
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,6144,4096,0.050144001841545105
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,6144,4096,0.04118400067090988
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,6144,3584,0.04464000090956688
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,6144,3584,0.04447999969124794
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,6144,3072,0.03951999917626381
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,6144,3584,0.037728000432252884
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,6144,3072,0.039423998445272446
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,6144,65536,0.5194560289382935
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,6144,3072,0.03359999880194664
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,6144,2560,0.03564799949526787
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,6144,2560,0.03526400029659271
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,6144,2048,0.03033600002527237
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,6144,2560,0.030880000442266464
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,6144,2048,0.025280000641942024
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,6144,1536,0.024639999493956566
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,6144,1536,0.024927999824285507
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,6144,1536,0.022272000089287758
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,6144,1024,0.019039999693632126
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,6144,1024,0.01929599978029728
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,6144,768,0.016992000862956047
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,6144,1024,0.018400000408291817
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,6144,768,0.016767999157309532
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,6144,512,0.013952000066637993
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,6144,768,0.016256000846624374
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,6144,512,0.014240000396966934
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,6144,512,0.013632000423967838
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,6144,2048,0.02953599952161312
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,6144,256,0.011455999687314034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,6144,256,0.011392000131309032
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,6144,256,0.011359999887645245
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,6144,128,0.010080000385642052
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,6144,128,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,6144,128,0.010239999741315842
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,6144,64,0.013344000093638897
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,6144,64,0.012992000207304955
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,6144,32,0.014751999638974667
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,6144,64,0.010304000228643417
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,6144,32,0.013887999579310417
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,6144,32,0.009824000298976898
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,5120,12288,0.10070399940013885
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,5120,16384,0.15779200196266174
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,5120,16384,0.12966400384902954
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,5120,12288,0.10313600301742554
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,5120,16384,0.137472003698349
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,5120,10240,0.1035199984908104
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,5120,12288,0.1058880016207695
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,5120,10240,0.0888959988951683
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,5120,8192,0.08473599702119827
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,5120,65536,0.6105920076370239
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,5120,10240,0.08972799777984619
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,5120,8192,0.0737600028514862
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,5120,8192,0.06921599805355072
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,5120,7168,0.07612799853086472
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,5120,7168,0.06790400296449661
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,5120,7168,0.06377600133419037
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,5120,6144,0.05897599831223488
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,5120,6144,0.060127999633550644
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,5120,65536,0.45824000239372253
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,5120,5120,0.05788800120353699
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,5120,5120,0.06099199876189232
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,5120,6144,0.05503999814391136
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,5120,4096,0.04524800181388855
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,5120,4096,0.044863998889923096
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,5120,5120,0.04742399975657463
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,5120,3584,0.0453759990632534
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,5120,65536,0.5214719772338867
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,5120,3584,0.03625600039958954
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,5120,4096,0.03961599990725517
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,5120,3072,0.0398080013692379
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,5120,3584,0.043807998299598694
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,5120,3072,0.03903999924659729
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,5120,2560,0.0344959981739521
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,5120,3072,0.032127998769283295
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,5120,2560,0.03452799841761589
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,5120,2560,0.028416000306606293
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,5120,2048,0.029632000252604485
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,5120,2048,0.029920000582933426
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,5120,1536,0.02518399991095066
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,5120,2048,0.0244159996509552
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,5120,1024,0.01894400082528591
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,5120,1536,0.025407999753952026
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,5120,1536,0.02115200087428093
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,5120,1024,0.01926399953663349
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,5120,1024,0.017823999747633934
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,5120,768,0.016863999888300896
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,5120,768,0.016287999227643013
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,5120,768,0.015584000386297703
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,5120,512,0.014560000039637089
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,5120,512,0.014271999709308147
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,5120,512,0.01321600005030632
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,5120,256,0.01142400037497282
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,5120,256,0.011136000044643879
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,5120,256,0.011296000331640244
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,5120,128,0.010111999697983265
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,5120,128,0.009983999654650688
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,5120,128,0.010367999784648418
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,5120,64,0.013472000136971474
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,5120,64,0.013376000337302685
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,5120,64,0.009247999638319016
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,5120,32,0.013952000066637993
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,5120,32,0.013504000380635262
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,5120,32,0.009855999611318111
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,4096,16384,0.09708800166845322
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,4096,16384,0.10470400005578995
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,4096,12288,0.0674239993095398
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,4096,16384,0.10655999928712845
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,4096,10240,0.06886400282382965
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,4096,12288,0.07900799810886383
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,4096,10240,0.06956800073385239
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,4096,12288,0.06931199878454208
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,4096,10240,0.07126399874687195
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,4096,8192,0.04761600121855736
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,4096,8192,0.06841599941253662
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,4096,8192,0.05286400020122528
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,4096,65536,0.3250240087509155
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,4096,7168,0.052639998495578766
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,4096,7168,0.04447999969124794
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,4096,6144,0.04822399839758873
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,4096,7168,0.04806400090456009
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,4096,65536,0.3240639865398407
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,4096,6144,0.039455998688936234
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,4096,65536,0.40537598729133606
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,4096,5120,0.04460800066590309
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,4096,6144,0.04630399867892265
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,4096,5120,0.03510399907827377
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,4096,4096,0.028416000306606293
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,4096,5120,0.038015998899936676
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,4096,4096,0.03964800015091896
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,4096,3584,0.03776000067591667
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,4096,4096,0.030239999294281006
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,4096,3584,0.035679999738931656
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,4096,3072,0.024607999250292778
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,4096,3072,0.024288000538945198
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,4096,3584,0.029184000566601753
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,4096,2560,0.02208000048995018
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,4096,3072,0.026688000187277794
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,4096,2560,0.028095999732613564
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,4096,2560,0.023520000278949738
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,4096,2048,0.01926399953663349
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,4096,2048,0.02425600029528141
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,4096,1536,0.016831999644637108
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,4096,2048,0.020479999482631683
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,4096,1536,0.017343999817967415
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,4096,1536,0.017664000391960144
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,4096,1024,0.013856000266969204
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,4096,1024,0.015744000673294067
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,4096,1024,0.014431999996304512
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,4096,768,0.012480000033974648
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,4096,768,0.014047999866306782
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,4096,768,0.013024000450968742
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,4096,512,0.01142400037497282
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,4096,512,0.011296000331640244
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,4096,512,0.012384000234305859
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,4096,256,0.010208000428974628
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,4096,256,0.009952000342309475
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,4096,256,0.010944000445306301
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,4096,128,0.00940799992531538
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,4096,128,0.00902399979531765
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,4096,128,0.009631999768316746
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,4096,64,0.012736000120639801
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,4096,64,0.009088000282645226
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,4096,64,0.012864000163972378
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,4096,32,0.013279999606311321
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,4096,32,0.012799999676644802
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,4096,32,0.009312000125646591
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,3584,12288,0.07251200079917908
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,3584,16384,0.0907839983701706
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,3584,12288,0.06851200014352798
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,3584,16384,0.08928000181913376
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,3584,16384,0.09801600128412247
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,3584,12288,0.07619199901819229
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,3584,10240,0.05820799991488457
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,3584,10240,0.05862399935722351
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,3584,10240,0.07241600006818771
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,3584,65536,0.35926398634910583
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,3584,8192,0.04870399832725525
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,3584,65536,0.290367990732193
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,3584,8192,0.04915200173854828
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,3584,7168,0.05337600037455559
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,3584,7168,0.04464000090956688
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,3584,8192,0.052671998739242554
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,3584,6144,0.04912000149488449
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,3584,7168,0.04739199951291084
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,3584,6144,0.04016000032424927
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,3584,5120,0.043807998299598694
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,3584,6144,0.04198399931192398
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,3584,5120,0.03468799963593483
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,3584,4096,0.029023999348282814
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,3584,5120,0.035999998450279236
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,3584,4096,0.02940800040960312
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,3584,65536,0.3816959857940674
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,3584,4096,0.033695999532938004
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,3584,3584,0.02707199938595295
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,3584,3584,0.026335999369621277
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,3584,3584,0.027936000376939774
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,3584,3072,0.024671999737620354
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,3584,3072,0.02486399933695793
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,3584,3072,0.02672000043094158
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,3584,2560,0.021344000473618507
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,3584,2560,0.021856000646948814
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,3584,2560,0.022336000576615334
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,3584,2048,0.01913600042462349
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,3584,2048,0.019999999552965164
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,3584,2048,0.02035200037062168
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,3584,1536,0.016736000776290894
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,3584,1536,0.016896000131964684
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,3584,1024,0.013472000136971474
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,3584,1536,0.017376000061631203
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,3584,1024,0.01360000018030405
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,3584,768,0.012736000120639801
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,3584,1024,0.016063999384641647
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,3584,768,0.013567999936640263
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,3584,512,0.011136000044643879
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,3584,512,0.010975999757647514
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,3584,256,0.010208000428974628
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,3584,512,0.012160000391304493
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,3584,256,0.010463999584317207
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,3584,256,0.010239999741315842
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,3584,128,0.009088000282645226
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,3584,128,0.009056000038981438
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,3584,64,0.012671999633312225
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,3584,128,0.009375999681651592
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,3584,64,0.012575999833643436
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,3584,768,0.01228800043463707
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,3584,64,0.009119999594986439
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,3584,32,0.012927999719977379
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,3584,32,0.012671999633312225
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,3584,32,0.009247999638319016
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,3072,16384,0.10320000350475311
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,3072,12288,0.06460800021886826
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,3072,12288,0.0822720006108284
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,3072,16384,0.08134400099515915
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,3072,16384,0.08662399649620056
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,3072,10240,0.06710399687290192
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,3072,12288,0.0658240020275116
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,3072,10240,0.059007998555898666
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,3072,10240,0.0549440011382103
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,3072,8192,0.05619199946522713
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,3072,65536,0.24480000138282776
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,3072,8192,0.04947200044989586
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,3072,7168,0.04323200136423111
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,3072,8192,0.04540799930691719
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,3072,7168,0.044415999203920364
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,3072,6144,0.038047999143600464
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,3072,6144,0.039264000952243805
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,3072,5120,0.042208001017570496
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,3072,65536,0.2409600019454956
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,3072,7168,0.040511999279260635
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,3072,6144,0.03516799956560135
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,3072,5120,0.04588799923658371
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,3072,4096,0.02864000014960766
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,3072,4096,0.029632000252604485
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,3072,5120,0.029952000826597214
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,3072,65536,0.3251839876174927
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,3072,3584,0.037728000432252884
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,3072,4096,0.026240000501275063
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,3072,3584,0.035679999738931656
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,3072,3584,0.02377600036561489
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,3072,3072,0.023615999147295952
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,3072,2560,0.02143999934196472
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,3072,3072,0.031968001276254654
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,3072,2560,0.022143999114632607
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,3072,3072,0.022207999601960182
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,3072,2560,0.0191040001809597
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,3072,2048,0.01913600042462349
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,3072,2048,0.019392000511288643
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,3072,2048,0.016767999157309532
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,3072,1536,0.01664000004529953
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,3072,1024,0.013728000223636627
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,3072,1536,0.017311999574303627
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,3072,1536,0.015263999812304974
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,3072,1024,0.013632000423967838
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,3072,1024,0.013439999893307686
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,3072,768,0.012256000190973282
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,3072,768,0.012223999947309494
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,3072,512,0.011168000288307667
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,3072,512,0.010879999957978725
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,3072,768,0.012575999833643436
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,3072,256,0.00979200005531311
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,3072,512,0.011487999930977821
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,3072,256,0.009727999567985535
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,3072,256,0.010367999784648418
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,3072,128,0.008832000195980072
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,3072,128,0.009056000038981438
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,3072,128,0.009151999838650227
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,3072,64,0.012384000234305859
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,3072,64,0.012768000364303589
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,3072,64,0.00854399986565113
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,3072,32,0.012703999876976013
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,3072,32,0.012864000163972378
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,3072,32,0.00848000030964613
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,2560,12288,0.07923199981451035
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,2560,16384,0.10278400033712387
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,2560,12288,0.06022400036454201
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,2560,16384,0.07494399696588516
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,2560,16384,0.07843200117349625
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,2560,12288,0.06224000081419945
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,2560,10240,0.05536000058054924
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,2560,10240,0.05488000065088272
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,2560,8192,0.055904000997543335
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,2560,10240,0.05321599915623665
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,2560,8192,0.04947200044989586
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,2560,7168,0.04249599948525429
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,2560,7168,0.044096000492572784
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,2560,8192,0.04342399910092354
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,2560,65536,0.23721599578857422
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,2560,65536,0.26153600215911865
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,2560,6144,0.03923200070858002
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,2560,6144,0.03763199970126152
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,2560,5120,0.03452799841761589
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,2560,6144,0.03468799963593483
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,2560,7168,0.0395519994199276
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,2560,5120,0.030271999537944794
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,2560,4096,0.029023999348282814
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,2560,4096,0.029055999591946602
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,2560,4096,0.026335999369621277
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,2560,65536,0.31279999017715454
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,2560,3584,0.02582399919629097
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,2560,3584,0.026655999943614006
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,2560,3584,0.023423999547958374
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,2560,3072,0.02751999907195568
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,2560,3072,0.024320000782608986
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,2560,2560,0.021344000473618507
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,2560,5120,0.033344000577926636
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,2560,2560,0.021503999829292297
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,2560,3072,0.021407999098300934
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,2560,2560,0.0191040001809597
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,2560,2048,0.01881599985063076
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,2560,2048,0.019519999623298645
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,2560,2048,0.016896000131964684
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,2560,1536,0.01664000004529953
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,2560,1536,0.017311999574303627
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,2560,1024,0.013311999849975109
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,2560,768,0.012128000147640705
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,2560,1024,0.013311999849975109
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,2560,1536,0.014655999839305878
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,2560,1024,0.013344000093638897
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,2560,768,0.011872000060975552
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,2560,768,0.012223999947309494
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,2560,512,0.011231999844312668
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,2560,512,0.012000000104308128
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,2560,256,0.009983999654650688
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,2560,512,0.011296000331640244
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,2560,256,0.009664000011980534
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,2560,128,0.009472000412642956
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,2560,256,0.009824000298976898
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,2560,128,0.009216000325977802
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,2560,64,0.01228800043463707
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,2560,128,0.009184000082314014
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,2560,64,0.008576000109314919
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,2560,64,0.012799999676644802
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,2560,32,0.012575999833643436
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,2560,32,0.012512000277638435
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,2560,32,0.008960000239312649
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,2048,16384,0.09200000017881393
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,2048,12288,0.05104000121355057
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,2048,12288,0.05215999856591225
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,2048,16384,0.06323199719190598
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,2048,16384,0.05619199946522713
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,2048,12288,0.044224001467227936
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,2048,10240,0.04527999833226204
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,2048,10240,0.04623999819159508
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,2048,10240,0.03846399858593941
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,2048,8192,0.04793599992990494
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,2048,65536,0.19945600628852844
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,2048,8192,0.0387520007789135
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,2048,65536,0.2268799990415573
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,2048,7168,0.0344959981739521
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,2048,8192,0.032127998769283295
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,2048,7168,0.03961599990725517
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,2048,6144,0.030751999467611313
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,2048,7168,0.029055999591946602
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,2048,6144,0.03145600110292435
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,2048,5120,0.027456000447273254
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,2048,6144,0.0261439997702837
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,2048,5120,0.03996799886226654
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,2048,5120,0.02284800074994564
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,2048,65536,0.20192000269889832
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,2048,4096,0.02816000021994114
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,2048,4096,0.023520000278949738
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,2048,4096,0.02006400004029274
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,2048,3584,0.02611199952661991
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,2048,3584,0.021983999758958817
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,2048,3584,0.018592000007629395
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,2048,3072,0.023615999147295952
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,2048,3072,0.020255999639630318
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,2048,3072,0.01708799973130226
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,2048,2560,0.01775999926030636
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,2048,2560,0.024768000468611717
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,2048,2560,0.015359999611973763
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,2048,2048,0.018432000651955605
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,2048,2048,0.02160000056028366
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,2048,1536,0.014592000283300877
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,2048,2048,0.01398400031030178
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,2048,1536,0.01408000010997057
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,2048,1024,0.011872000060975552
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,2048,1536,0.012160000391304493
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,2048,1024,0.014271999709308147
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,2048,1024,0.01152000017464161
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,2048,768,0.011103999800980091
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,2048,768,0.011103999800980091
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,2048,512,0.009696000255644321
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,2048,768,0.013088000006973743
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,2048,512,0.009920000098645687
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,2048,256,0.009568000212311745
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,2048,512,0.011455999687314034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,2048,256,0.008991999551653862
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,2048,256,0.009472000412642956
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,2048,128,0.008448000065982342
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,2048,128,0.008704000152647495
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,2048,128,0.008671999908983707
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,2048,64,0.012512000277638435
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,2048,64,0.012640000320971012
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,2048,64,0.008287999778985977
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,2048,32,0.01228800043463707
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,2048,32,0.0081599997356534
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,1536,12288,0.05283199995756149
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,1536,16384,0.05142400041222572
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,2048,32,0.01283199992030859
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,1536,16384,0.05276799947023392
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,1536,12288,0.06892800331115723
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,1536,16384,0.056543998420238495
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,1536,10240,0.040192000567913055
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,1536,12288,0.04444799944758415
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,1536,10240,0.040192000567913055
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,1536,10240,0.0382080003619194
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,1536,65536,0.15619200468063354
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,1536,8192,0.03625600039958954
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,1536,65536,0.14403200149536133
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,1536,8192,0.03206399828195572
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,1536,7168,0.04198399931192398
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,1536,7168,0.044064000248909
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,1536,8192,0.03657599911093712
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,1536,6144,0.03811199963092804
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,1536,7168,0.02924799919128418
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,1536,6144,0.03948799893260002
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,1536,5120,0.034272000193595886
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,1536,5120,0.03286400064826012
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,1536,5120,0.023231999948620796
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,1536,6144,0.0261439997702837
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,1536,4096,0.023520000278949738
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,1536,4096,0.02828799933195114
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,1536,4096,0.019807999953627586
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,1536,65536,0.20147199928760529
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,1536,3584,0.021568000316619873
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,1536,3584,0.025536000728607178
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,1536,3584,0.018464000895619392
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,1536,3072,0.019711999222636223
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,1536,3072,0.020287999883294106
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,1536,3072,0.017023999243974686
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,1536,2560,0.024768000468611717
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,1536,2560,0.017535999417304993
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,1536,2048,0.01833599992096424
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,1536,2560,0.015584000386297703
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,1536,1536,0.01648000068962574
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,1536,2048,0.016607999801635742
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,1536,1536,0.018688000738620758
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,1536,2048,0.013663999736309052
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,1536,1024,0.01158399973064661
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,1536,1536,0.012736000120639801
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,1536,1024,0.014047999866306782
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,1536,768,0.012032000347971916
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,1536,1024,0.012095999903976917
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,1536,768,0.012640000320971012
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,1536,512,0.01142400037497282
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,1536,512,0.009568000212311745
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,1536,768,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,1536,512,0.011680000461637974
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,1536,256,0.008736000396311283
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,1536,256,0.00902399979531765
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,1536,128,0.008576000109314919
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,1536,256,0.009184000082314014
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,1536,128,0.008448000065982342
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,1536,64,0.01228800043463707
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,1536,64,0.012543999589979649
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,1536,128,0.008415999822318554
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,1536,32,0.012512000277638435
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,1536,64,0.007935999892652035
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,1536,32,0.012736000120639801
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,1536,32,0.008383999578654766
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,1024,12288,0.03868800029158592
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,1024,12288,0.03798399865627289
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,1024,16384,0.04479999840259552
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,1024,16384,0.04428799822926521
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,1024,16384,0.03782400116324425
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,1024,12288,0.028031999245285988
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,1024,10240,0.03532800078392029
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,1024,10240,0.039264000952243805
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,1024,8192,0.036479998379945755
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,1024,10240,0.025439999997615814
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,1024,8192,0.03500799834728241
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,1024,7168,0.030432000756263733
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,1024,7168,0.0306560005992651
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,1024,8192,0.02131200022995472
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,1024,65536,0.1424960047006607
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,1024,7168,0.01961600035429001
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,1024,65536,0.12009599804878235
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,1024,6144,0.03046399913728237
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,1024,5120,0.026496000587940216
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,1024,6144,0.027583999559283257
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,1024,6144,0.017920000478625298
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,1024,5120,0.0161920003592968
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,1024,5120,0.0244159996509552
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,1024,4096,0.02223999984562397
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,1024,4096,0.03388800099492073
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,1024,65536,0.11680000275373459
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,1024,3584,0.031007999554276466
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,1024,3584,0.020927999168634415
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,1024,4096,0.0144640002399683
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,1024,3072,0.018783999606966972
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,1024,3584,0.01360000018030405
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,1024,3072,0.018688000738620758
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,1024,3072,0.013663999736309052
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,1024,2560,0.01744000054895878
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,1024,2048,0.015615999698638916
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,1024,2560,0.011711999773979187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,1024,2048,0.021503999829292297
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,1024,1536,0.014336000196635723
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,1024,2048,0.01119999960064888
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,1024,1536,0.013279999606311321
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,1024,1024,0.011327999643981457
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,1024,1024,0.010495999827980995
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,1024,768,0.010239999741315842
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,1024,1536,0.010400000028312206
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,1024,1024,0.010111999697983265
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,1024,768,0.009983999654650688
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,1024,768,0.009855999611318111
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,1024,512,0.009696000255644321
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,1024,256,0.00854399986565113
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,1024,512,0.009503999724984169
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,1024,512,0.010080000385642052
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,1024,256,0.008576000109314919
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,1024,256,0.009088000282645226
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,1024,128,0.008671999908983707
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,1024,2560,0.017503999173641205
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,1024,128,0.008991999551653862
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,1024,128,0.00848000030964613
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,1024,64,0.011935999616980553
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,1024,64,0.011552000418305397
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,1024,32,0.012160000391304493
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,1024,64,0.00825599953532219
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,1024,32,0.012032000347971916
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,1024,32,0.0081599997356534
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,768,12288,0.03686400130391121
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,768,12288,0.03699199855327606
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,768,16384,0.04297599941492081
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,768,16384,0.042367998510599136
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,768,16384,0.03471999987959862
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,768,12288,0.02796800062060356
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,768,10240,0.032575998455286026
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,768,10240,0.03363199904561043
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,768,8192,0.03292800113558769
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,768,10240,0.024480000138282776
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,768,8192,0.03062400035560131
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,768,65536,0.09363199770450592
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,768,8192,0.02112000063061714
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,768,7168,0.028863999992609024
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,768,65536,0.1029760017991066
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,768,7168,0.028511999174952507
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,768,6144,0.026464000344276428
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,768,7168,0.01926399953663349
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,768,6144,0.02691200003027916
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,768,6144,0.017343999817967415
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,768,5120,0.026655999943614006
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,768,5120,0.024320000782608986
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,768,65536,0.11631999909877777
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,768,4096,0.02099199965596199
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,768,4096,0.02208000048995018
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,768,4096,0.018559999763965607
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,768,5120,0.016127999871969223
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,768,3584,0.01974399946630001
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,768,3584,0.019967999309301376
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,768,3584,0.013728000223636627
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,768,3072,0.017664000391960144
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,768,3072,0.019231999292969704
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,768,3072,0.012736000120639801
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,768,2560,0.01740800030529499
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,768,2560,0.016256000846624374
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,768,2048,0.014976000413298607
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,768,2560,0.014399999752640724
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,768,2048,0.014560000039637089
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,768,1536,0.014336000196635723
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,768,2048,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,768,1536,0.01321600005030632
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,768,1536,0.01065600011497736
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,768,1024,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,768,1024,0.010495999827980995
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,768,1024,0.010208000428974628
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,768,768,0.010304000228643417
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,768,768,0.010015999898314476
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,768,512,0.009568000212311745
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,768,512,0.009247999638319016
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,768,768,0.011680000461637974
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,768,256,0.008671999908983707
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,768,512,0.01027199998497963
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,768,256,0.008352000266313553
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,768,128,0.008287999778985977
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,768,256,0.009119999594986439
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,768,128,0.0081599997356534
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,768,128,0.00825599953532219
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,768,64,0.011711999773979187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,768,64,0.011552000418305397
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,768,64,0.008191999979317188
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,768,32,0.011744000017642975
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,768,32,0.01228800043463707
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,768,32,0.007840000092983246
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,512,12288,0.037151999771595
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,512,16384,0.03670400008559227
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,512,16384,0.036896001547575
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,512,16384,0.03446400165557861
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,512,12288,0.04233599826693535
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,512,10240,0.028704000636935234
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,512,10240,0.029759999364614487
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,512,8192,0.033376000821590424
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,512,10240,0.024351999163627625
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,512,12288,0.03171199932694435
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,512,65536,0.08403199911117554
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,512,8192,0.030719999223947525
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,512,8192,0.03001599945127964
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,512,65536,0.0950080007314682
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,512,7168,0.02672000043094158
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,512,7168,0.028831999748945236
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,512,6144,0.029023999348282814
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,512,6144,0.027103999629616737
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,512,7168,0.020864000543951988
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,512,65536,0.11606399714946747
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,512,5120,0.025280000641942024
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,512,6144,0.017503999173641205
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,512,5120,0.02393599972128868
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,512,4096,0.02191999927163124
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,512,5120,0.015904000028967857
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,512,4096,0.021183999255299568
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,512,4096,0.013887999579310417
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,512,3584,0.018719999119639397
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,512,3584,0.019519999623298645
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,512,3072,0.01833599992096424
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,512,3584,0.01724799908697605
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,512,3072,0.01833599992096424
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,512,3072,0.012640000320971012
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,512,2560,0.01571200042963028
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,512,2560,0.011776000261306763
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,512,2048,0.014303999952971935
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,512,2560,0.016224000602960587
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,512,2048,0.014592000283300877
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,512,1536,0.013407999649643898
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,512,2048,0.010944000445306301
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,512,1536,0.013439999893307686
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,512,1536,0.010208000428974628
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,512,1024,0.010400000028312206
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,512,1024,0.010944000445306301
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,512,1024,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,512,768,0.009983999654650688
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,512,768,0.00979200005531311
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,512,512,0.009184000082314014
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,512,768,0.011776000261306763
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,512,512,0.008704000152647495
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,512,512,0.010304000228643417
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,512,256,0.008767999708652496
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,512,256,0.008415999822318554
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,512,128,0.008063999935984612
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,512,256,0.00902399979531765
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,512,128,0.008031999692320824
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,512,128,0.008352000266313553
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,512,64,0.011711999773979187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,512,64,0.011615999974310398
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,512,64,0.0080960001796484
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,512,32,0.011615999974310398
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,512,32,0.011711999773979187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,512,32,0.0077760000713169575
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,256,12288,0.03094400092959404
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,256,12288,0.027615999802947044
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,256,16384,0.03235200047492981
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,256,16384,0.03145600110292435
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,256,12288,0.02735999971628189
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,256,16384,0.03420799970626831
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,256,10240,0.026208000257611275
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,256,10240,0.025919999927282333
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,256,8192,0.027135999873280525
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,256,8192,0.030079999938607216
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,256,10240,0.03670400008559227
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,256,7168,0.02672000043094158
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,256,8192,0.023104000836610794
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,256,65536,0.07843200117349625
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,256,7168,0.02316799946129322
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,256,65536,0.06729599833488464
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,256,6144,0.026559999212622643
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,256,7168,0.027424000203609467
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,256,6144,0.022463999688625336
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,256,5120,0.021215999498963356
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,256,5120,0.02579200081527233
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,256,6144,0.024447999894618988
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,256,5120,0.016256000846624374
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,256,4096,0.021824000403285027
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,256,4096,0.020800000056624413
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,256,3584,0.019007999449968338
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,256,65536,0.11673600226640701
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,256,4096,0.018432000651955605
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,256,3584,0.019392000511288643
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,256,3584,0.013311999849975109
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,256,3072,0.019168000668287277
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,256,3072,0.01788800023496151
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,256,3072,0.012191999703645706
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,256,2560,0.016224000602960587
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,256,2560,0.01583999954164028
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,256,2048,0.014527999795973301
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,256,2560,0.01142400037497282
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,256,2048,0.014592000283300877
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,256,2048,0.011327999643981457
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,256,1536,0.012703999876976013
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,256,1536,0.013120000250637531
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,256,1536,0.009920000098645687
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,256,1024,0.01033599954098463
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,256,1024,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,256,768,0.009855999611318111
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,256,1024,0.010623999871313572
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,256,512,0.009216000325977802
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,256,512,0.008736000396311283
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,256,768,0.011487999930977821
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,256,512,0.009983999654650688
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,256,256,0.008031999692320824
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,256,256,0.00848000030964613
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,256,256,0.00886400043964386
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,256,128,0.0080960001796484
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,256,128,0.007679999805986881
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,256,64,0.01142400037497282
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,256,128,0.0081599997356534
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,256,64,0.011296000331640244
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,256,64,0.008128000423312187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,256,32,0.011680000461637974
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,256,32,0.01158399973064661
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,256,32,0.008191999979317188
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,256,768,0.009824000298976898
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,128,12288,0.026976000517606735
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,128,16384,0.029279999434947968
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,128,12288,0.02630399912595749
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,128,16384,0.028192000463604927
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,128,16384,0.03903999924659729
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,128,10240,0.02630399912595749
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,128,12288,0.0414079986512661
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,128,10240,0.025087999179959297
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,128,8192,0.02534399926662445
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,128,10240,0.024191999807953835
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,128,65536,0.06419199705123901
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,128,7168,0.022336000576615334
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,128,65536,0.05286400020122528
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,128,8192,0.02304000034928322
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,128,8192,0.0208320003002882
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,128,7168,0.02195199951529503
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,128,6144,0.02396799996495247
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,128,7168,0.019007999449968338
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,128,5120,0.020447999238967896
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,128,6144,0.021183999255299568
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,128,6144,0.023391999304294586
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,128,5120,0.020128000527620316
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,128,5120,0.021503999829292297
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,128,4096,0.019007999449968338
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,128,4096,0.018880000337958336
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,128,65536,0.1173119992017746
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,128,4096,0.014240000396966934
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,128,3584,0.020160000771284103
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,128,3584,0.019071999937295914
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,128,3584,0.012896000407636166
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,128,3072,0.01727999933063984
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,128,3072,0.017696000635623932
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,128,2560,0.015776000916957855
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,128,2560,0.01648000068962574
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,128,3072,0.012319999746978283
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,128,2560,0.011680000461637974
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,128,2048,0.014751999638974667
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,128,2048,0.014944000169634819
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,128,2048,0.010400000028312206
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,128,1536,0.00979200005531311
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,128,1536,0.013439999893307686
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,128,1536,0.013151999562978745
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,128,1024,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,128,1024,0.010495999827980995
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,128,768,0.009535999968647957
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,128,768,0.009696000255644321
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,128,1024,0.009727999567985535
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,128,768,0.011487999930977821
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,128,512,0.008895999751985073
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,128,512,0.008832000195980072
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,128,512,0.009952000342309475
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,128,256,0.008191999979317188
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,128,256,0.008191999979317188
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,128,128,0.007648000027984381
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,128,256,0.008799999952316284
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,128,128,0.008128000423312187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,128,64,0.01152000017464161
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,128,128,0.008063999935984612
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,128,64,0.011264000087976456
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,128,32,0.011680000461637974
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,128,64,0.008063999935984612
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,128,32,0.011455999687314034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,128,32,0.008287999778985977
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,64,12288,0.026623999699950218
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,64,12288,0.026528000831604004
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,64,16384,0.029055999591946602
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,64,16384,0.029184000566601753
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,64,16384,0.04076800122857094
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,64,12288,0.04335999861359596
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,64,10240,0.024927999824285507
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,64,10240,0.026240000501275063
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,64,10240,0.036928001791238785
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,64,8192,0.02582399919629097
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,64,8192,0.023296000435948372
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,64,65536,0.055296000093221664
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,64,65536,0.05510399863123894
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,64,8192,0.020800000056624413
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,64,6144,0.021888000890612602
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,64,7168,0.022143999114632607
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,64,7168,0.01926399953663349
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,64,6144,0.021536000072956085
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,64,5120,0.021088000386953354
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,64,6144,0.02534399926662445
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,64,5120,0.019711999222636223
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,64,4096,0.019648000597953796
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,64,5120,0.01759999990463257
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,64,65536,0.11628799885511398
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,64,4096,0.019936000928282738
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,64,3584,0.020255999639630318
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,64,4096,0.01398400031030178
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,64,7168,0.024351999163627625
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,64,3584,0.018751999363303185
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,64,3072,0.017311999574303627
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,64,3072,0.01756799966096878
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,64,3584,0.014399999752640724
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,64,3072,0.01244799979031086
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,64,2560,0.016672000288963318
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,64,2560,0.01587199978530407
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,64,2560,0.011487999930977821
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,64,2048,0.014399999752640724
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,64,2048,0.014368000440299511
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,64,1536,0.013311999849975109
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,64,2048,0.012736000120639801
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,64,1536,0.01360000018030405
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,64,1536,0.009759999811649323
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,64,1024,0.009952000342309475
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,64,1024,0.010432000271975994
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,64,768,0.009344000369310379
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,64,1024,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,64,768,0.009824000298976898
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,64,512,0.008991999551653862
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,64,768,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,64,512,0.008832000195980072
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,64,512,0.00979200005531311
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,64,256,0.0080960001796484
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,64,128,0.007712000049650669
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,64,128,0.007807999849319458
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,64,256,0.007807999849319458
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,64,256,0.008576000109314919
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,64,128,0.00800000037997961
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,64,64,0.011103999800980091
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,64,64,0.011264000087976456
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,64,64,0.00800000037997961
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,64,32,0.01142400037497282
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,64,32,0.007935999892652035
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,64,32,0.01152000017464161
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,32,12288,0.026784000918269157
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,32,12288,0.026367999613285065
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,32,16384,0.030432000756263733
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,32,16384,0.028543999418616295
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,32,16384,0.05071999877691269
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,32,10240,0.02691200003027916
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,32,12288,0.041280001401901245
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,32,10240,0.025280000641942024
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,32,8192,0.025407999753952026
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,32,10240,0.034912001341581345
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,32,8192,0.02316799946129322
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,32,8192,0.028351999819278717
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,32,65536,0.054655998945236206
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,32,65536,0.05276799947023392
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,32,7168,0.02271999977529049
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,32,7168,0.023615999147295952
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,32,6144,0.021856000646948814
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,32,7168,0.02691200003027916
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,32,6144,0.021088000386953354
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,32,5120,0.021088000386953354
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,32,6144,0.024064000695943832
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,32,5120,0.020191999152302742
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,32,5120,0.019328000023961067
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,32,4096,0.01958400011062622
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,32,4096,0.019648000597953796
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,32,65536,0.11244799941778183
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,32,4096,0.017823999747633934
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,32,3584,0.018400000408291817
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,32,3584,0.019487999379634857
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,32,3584,0.015968000516295433
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,32,3072,0.01692800037562847
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,32,3072,0.017855999991297722
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,32,2560,0.016736000776290894
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,32,3072,0.01539199985563755
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,32,2560,0.01616000011563301
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,32,2048,0.014208000153303146
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,32,2560,0.01375999953597784
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,32,2048,0.015135999768972397
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,32,1536,0.012864000163972378
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,32,1024,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,32,1536,0.010879999957978725
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,32,2048,0.011807999573647976
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,32,1536,0.013183999806642532
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,32,1024,0.010208000428974628
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,32,768,0.009824000298976898
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,32,1024,0.010463999584317207
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,32,768,0.009600000455975533
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,32,512,0.008736000396311283
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,32,768,0.010688000358641148
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,32,512,0.009344000369310379
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,32,256,0.00825599953532219
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,32,256,0.008191999979317188
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,32,512,0.008671999908983707
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,32,256,0.008704000152647495
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,32,128,0.007903999648988247
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,32,128,0.007807999849319458
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,32,64,0.011455999687314034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,32,64,0.011168000288307667
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,32,64,0.007935999892652035
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,512,32,32,0.011296000331640244
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,512,32,32,0.011231999844312668
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,32,32,0.007519999984651804
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,512,32,128,0.00825599953532219
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,65536,12288,1.1602879762649536
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,65536,16384,1.5551680326461792
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,65536,10240,0.9431679844856262
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,65536,10240,0.8661119937896729
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,65536,8192,0.7651839852333069
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,65536,8192,0.7530239820480347
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,65536,7168,1.184000015258789
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,65536,12288,1.211967945098877
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,65536,12288,0.8001599907875061
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,65536,6144,0.6086400151252747
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,65536,7168,0.667680025100708
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,65536,10240,0.5101119875907898
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,65536,16384,1.4607360363006592
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,65536,8192,0.4168640077114105
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,65536,6144,0.5844799876213074
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,65536,4096,0.4216960072517395
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,65536,5120,0.49929600954055786
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,65536,16384,0.7968000173568726
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,65536,7168,0.4818879961967468
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,65536,4096,0.22307200729846954
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,65536,5120,0.4739840030670166
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,65536,4096,0.416128009557724
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,65536,3584,0.36825600266456604
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,65536,3072,0.30534398555755615
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,65536,3072,0.3370879888534546
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,65536,6144,0.321727991104126
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,65536,3584,0.2889919877052307
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,65536,5120,0.2720640003681183
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,65536,2560,0.2563839852809906
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,65536,2048,0.384768009185791
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,65536,3584,0.26134398579597473
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,65536,2560,0.24780799448490143
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,65536,3072,0.23161600530147552
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,65536,1536,0.159743994474411
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,65536,1536,0.29712000489234924
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,65536,2048,0.20758399367332458
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,65536,1024,0.1913599967956543
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,65536,2048,0.12966400384902954
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,65536,2560,0.2009280025959015
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,65536,768,0.147039994597435
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,65536,1024,0.1839040070772171
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,65536,1536,0.1372160017490387
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,65536,768,0.13683199882507324
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,65536,1024,0.08396799862384796
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,65536,512,0.0974079966545105
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,65536,512,0.1013759970664978
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,65536,128,0.031968001276254654
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,65536,768,0.07231999933719635
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,65536,256,0.050144001841545105
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,65536,256,0.04992000013589859
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,65536,512,0.060447998344898224
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,65536,128,0.03171199932694435
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,65536,256,0.04598399996757507
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,65536,128,0.037856001406908035
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,65536,64,0.03139200061559677
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,65536,64,0.03177599981427193
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,65536,32,0.04358400031924248
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,65536,64,0.034752000123262405
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,65536,32,0.038047999143600464
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,65536,32,0.033984001725912094
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,16384,12288,0.28812798857688904
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,16384,12288,0.23574399948120117
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,16384,16384,0.37436801195144653
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,16384,16384,0.30959999561309814
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,16384,16384,0.2685759961605072
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,16384,10240,0.25094398856163025
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,16384,10240,0.20003199577331543
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,16384,12288,0.20425599813461304
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,16384,8192,0.1932159960269928
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,16384,8192,0.16198399662971497
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,16384,10240,0.17283199727535248
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,16384,65536,1.1787840127944946
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,16384,7168,0.17868800461292267
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,16384,7168,0.17267200350761414
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,16384,8192,0.1425279974937439
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,16384,6144,0.1391039937734604
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,16384,6144,0.14627200365066528
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,16384,5120,0.10992000252008438
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,16384,7168,0.1263359934091568
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,16384,5120,0.10665600001811981
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,16384,6144,0.1098560020327568
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,16384,4096,0.08848000317811966
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,16384,5120,0.09391999989748001
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,16384,65536,1.4915200471878052
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,16384,4096,0.09884800016880035
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,16384,3584,0.07913599908351898
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,16384,4096,0.07788799703121185
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,16384,3584,0.09087999910116196
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,16384,3072,0.0793600007891655
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,16384,3072,0.07065600156784058
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,16384,2560,0.06063999980688095
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,16384,2560,0.05427199974656105
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,16384,2560,0.12012799829244614
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,16384,3072,0.06323199719190598
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,16384,2048,0.09583999961614609
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,16384,2048,0.052032001316547394
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,16384,1536,0.07433599978685379
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,16384,1536,0.07212799787521362
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,16384,2048,0.04630399867892265
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,16384,1024,0.05087999999523163
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,16384,1536,0.039583999663591385
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,16384,1024,0.04700800031423569
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,16384,768,0.038495998829603195
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,16384,1024,0.031199999153614044
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,16384,768,0.038784001022577286
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,16384,512,0.027648000046610832
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,16384,3584,0.07030399888753891
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,16384,768,0.02751999907195568
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,16384,512,0.028384000062942505
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,16384,256,0.018559999763965607
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,16384,256,0.018079999834299088
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,16384,512,0.02380800060927868
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,16384,256,0.01929599978029728
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,16384,128,0.014431999996304512
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,16384,128,0.014208000153303146
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,16384,128,0.01724799908697605
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,16384,64,0.016383999958634377
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,16384,64,0.01600000075995922
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,16384,65536,1.0331840515136719
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,16384,64,0.01583999954164028
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,16384,32,0.018432000651955605
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,16384,32,0.018144000321626663
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,16384,32,0.015519999898970127
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,12288,16384,0.3035520017147064
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,12288,16384,0.29788801074028015
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,12288,12288,0.23839999735355377
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,12288,12288,0.24022400379180908
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,12288,10240,0.1984959989786148
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,12288,16384,0.266400009393692
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,12288,10240,0.2022079974412918
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,12288,12288,0.20943999290466309
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,12288,8192,0.26611199975013733
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,12288,65536,1.452672004699707
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,12288,10240,0.17455999553203583
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,12288,8192,0.2850880026817322
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,12288,7168,0.15644800662994385
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,12288,65536,1.1747839450836182
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,12288,6144,0.13087999820709229
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,12288,7168,0.15248000621795654
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,12288,8192,0.14083200693130493
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,12288,6144,0.20377600193023682
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,12288,5120,0.17452800273895264
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,12288,7168,0.12534399330615997
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,12288,5120,0.16463999450206757
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,12288,6144,0.11033599823713303
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,12288,4096,0.14319999516010284
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,12288,5120,0.09308800101280212
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,12288,4096,0.14582400023937225
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,12288,3584,0.1345919966697693
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,12288,4096,0.078015998005867
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,12288,3584,0.13209599256515503
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,12288,3072,0.10953599959611893
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,12288,3072,0.1117440015077591
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,12288,2560,0.09247999638319016
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,12288,3584,0.06911999732255936
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,12288,2560,0.0907839983701706
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,12288,3072,0.06220800057053566
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,12288,2048,0.0761599987745285
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,12288,2048,0.07347200065851212
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,12288,2560,0.05350400134921074
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,12288,1536,0.05567999929189682
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,12288,1536,0.057631999254226685
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,12288,2048,0.039712000638246536
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,12288,1536,0.03807999938726425
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,12288,1024,0.037376001477241516
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,12288,1024,0.03728000074625015
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,12288,768,0.03014400042593479
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,12288,768,0.030559999868273735
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,12288,1024,0.02864000014960766
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,12288,768,0.024064000695943832
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,12288,512,0.022816000506281853
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,12288,512,0.022304000332951546
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,12288,256,0.015359999611973763
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,12288,512,0.020800000056624413
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,12288,256,0.015072000212967396
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,12288,256,0.015968000516295433
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,12288,128,0.012223999947309494
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,12288,128,0.01375999953597784
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,12288,128,0.01235199999064207
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,12288,64,0.015168000012636185
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,12288,64,0.01484800036996603
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,12288,32,0.016448000445961952
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,12288,64,0.012415999546647072
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,12288,32,0.017311999574303627
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,12288,32,0.012768000364303589
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,12288,65536,1.0325759649276733
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,10240,12288,0.20943999290466309
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,10240,16384,0.26339200139045715
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,10240,16384,0.26022401452064514
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,10240,12288,0.17372800409793854
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,10240,10240,0.1783359944820404
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,10240,10240,0.1751679927110672
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,10240,12288,0.20975999534130096
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,10240,16384,0.2176000028848648
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,10240,8192,0.1581760048866272
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,10240,8192,0.14883199334144592
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,10240,65536,0.9298239946365356
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,10240,7168,0.13052800297737122
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,10240,10240,0.09126400202512741
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,10240,8192,0.11388800293207169
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,10240,7168,0.14086399972438812
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,10240,6144,0.1130559965968132
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,10240,6144,0.11151999980211258
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,10240,5120,0.10249599814414978
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,10240,6144,0.08953599631786346
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,10240,7168,0.10159999877214432
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,10240,5120,0.10358399897813797
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,10240,4096,0.05878400057554245
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,10240,4096,0.08511999994516373
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,10240,5120,0.08646400272846222
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,10240,3584,0.05289600044488907
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,10240,3584,0.07519999891519547
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,10240,4096,0.060864001512527466
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,10240,3072,0.06659200042486191
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,10240,3072,0.06908799707889557
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,10240,3584,0.05471999943256378
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,10240,2560,0.08313599973917007
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,10240,3072,0.05558399856090546
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,10240,2560,0.08396799862384796
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,10240,2048,0.06838399916887283
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,10240,2048,0.0655359998345375
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,10240,65536,0.9755200147628784
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,10240,2560,0.04243199899792671
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,10240,1536,0.048767998814582825
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,10240,2048,0.03827200084924698
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,10240,1536,0.05084799975156784
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,10240,1536,0.02908799983561039
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,10240,1024,0.03561599925160408
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,10240,1024,0.03417599946260452
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,10240,1024,0.02505600079894066
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,10240,768,0.028384000062942505
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,10240,768,0.029152000322937965
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,10240,768,0.022304000332951546
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,10240,512,0.02191999927163124
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,10240,512,0.02115200087428093
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,10240,512,0.01865600049495697
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,10240,65536,1.029695987701416
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,10240,256,0.014911999925971031
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,10240,256,0.014495999552309513
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,10240,256,0.014592000283300877
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,10240,128,0.01228800043463707
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,10240,128,0.012191999703645706
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,10240,128,0.012959999963641167
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,10240,64,0.015263999812304974
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,10240,64,0.01539199985563755
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,10240,32,0.014688000082969666
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,10240,64,0.012480000033974648
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,10240,32,0.01500799972563982
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,10240,32,0.011839999817311764
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,8192,12288,0.14876799285411835
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,8192,12288,0.12144000083208084
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,8192,16384,0.15756799280643463
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,8192,16384,0.1621759980916977
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,8192,12288,0.10476800054311752
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,8192,10240,0.1143679991364479
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,8192,16384,0.13651199638843536
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,8192,10240,0.11833599954843521
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,8192,8192,0.0862400010228157
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,8192,10240,0.0894400030374527
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,8192,8192,0.17715199291706085
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,8192,65536,0.771776020526886
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,8192,7168,0.07664000242948532
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,8192,8192,0.07321599870920181
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,8192,7168,0.08166400343179703
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,8192,65536,0.594111979007721
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,8192,6144,0.06745599955320358
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,8192,6144,0.07606399804353714
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,8192,6144,0.05798399820923805
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,8192,7168,0.06649599969387054
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,8192,5120,0.05923200026154518
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,8192,5120,0.04992000013589859
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,8192,5120,0.057760000228881836
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,8192,4096,0.04934399947524071
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,8192,4096,0.04995200037956238
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,8192,3584,0.08656000345945358
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,8192,3584,0.044319998472929
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,8192,4096,0.04195199906826019
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,8192,3072,0.04287999868392944
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,8192,3584,0.03791999816894531
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,8192,3072,0.04057599976658821
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,8192,2560,0.06560000032186508
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,8192,3072,0.03404799848794937
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,8192,2560,0.059776000678539276
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,8192,2560,0.03017600066959858
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,8192,2048,0.049695998430252075
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,8192,2048,0.05049600079655647
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,8192,1536,0.03932800143957138
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,8192,1536,0.0387520007789135
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,8192,2048,0.025919999927282333
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,8192,1536,0.022911999374628067
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,8192,1024,0.027807999402284622
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,8192,1024,0.019999999552965164
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,8192,768,0.021983999758958817
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,8192,768,0.022431999444961548
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,8192,768,0.017855999991297722
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,8192,65536,0.5193600058555603
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,8192,512,0.017216000705957413
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,8192,512,0.01740800030529499
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,8192,512,0.015359999611973763
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,8192,256,0.012671999633312225
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,8192,256,0.012768000364303589
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,8192,128,0.010944000445306301
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,8192,256,0.012703999876976013
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,8192,128,0.010975999757647514
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,8192,64,0.01369599997997284
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,8192,64,0.013535999692976475
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,8192,128,0.011648000217974186
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,8192,32,0.014720000326633453
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,8192,64,0.010912000201642513
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,8192,32,0.014271999709308147
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,8192,1024,0.026176000013947487
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,8192,32,0.011008000001311302
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,7168,12288,0.13846400380134583
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,7168,12288,0.12111999839544296
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,7168,16384,0.17865599691867828
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,7168,16384,0.1584639996290207
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,7168,16384,0.13728000223636627
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,7168,10240,0.10451199859380722
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,7168,12288,0.10592000186443329
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,7168,10240,0.1061440035700798
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,7168,8192,0.08575999736785889
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,7168,10240,0.08963199704885483
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,7168,7168,0.0761599987745285
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,7168,8192,0.09379199892282486
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,7168,65536,0.5952320098876953
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,7168,8192,0.07347200065851212
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,7168,7168,0.08416000008583069
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,7168,7168,0.06592000275850296
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,7168,6144,0.11935999989509583
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,7168,6144,0.06691200286149979
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,7168,5120,0.05910399928689003
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,7168,5120,0.05974400043487549
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,7168,65536,0.7123519778251648
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,7168,4096,0.09699200093746185
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,7168,4096,0.04912000149488449
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,7168,6144,0.05721599981188774
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,7168,5120,0.04975999891757965
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,7168,3584,0.044256001710891724
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,7168,3584,0.04556800052523613
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,7168,3072,0.040672000497579575
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,7168,4096,0.041919998824596405
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,7168,3072,0.0679360032081604
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,7168,3584,0.03759999945759773
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,7168,3072,0.033952001482248306
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,7168,2560,0.05920000001788139
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,7168,2560,0.058880001306533813
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,7168,2048,0.046720001846551895
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,7168,2560,0.029823999851942062
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,7168,2048,0.04585599899291992
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,7168,2048,0.02582399919629097
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,7168,1536,0.03683200106024742
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,7168,1536,0.034623999148607254
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,7168,1536,0.022816000506281853
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,7168,1024,0.024320000782608986
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,7168,65536,0.5157439708709717
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,7168,1024,0.025280000641942024
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,7168,1024,0.01974399946630001
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,7168,768,0.02099199965596199
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,7168,768,0.020959999412298203
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,7168,512,0.01692800037562847
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,7168,512,0.01724799908697605
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,7168,768,0.01711999997496605
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,7168,256,0.012256000190973282
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,7168,512,0.015456000342965126
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,7168,256,0.012671999633312225
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,7168,128,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,7168,128,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,7168,256,0.01228800043463707
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,7168,128,0.011872000060975552
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,7168,64,0.013344000093638897
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,7168,64,0.013472000136971474
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,7168,64,0.010944000445306301
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,7168,32,0.014399999752640724
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,7168,32,0.014015999622642994
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,7168,32,0.011168000288307667
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,6144,12288,0.14441600441932678
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,6144,12288,0.12156800180673599
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,6144,16384,0.19225600361824036
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,6144,16384,0.11859200149774551
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,6144,10240,0.10275200009346008
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,6144,16384,0.13609600067138672
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,6144,12288,0.10454399883747101
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,6144,10240,0.0809599980711937
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,6144,8192,0.06915199756622314
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,6144,8192,0.1581439971923828
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,6144,8192,0.07292799651622772
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,6144,65536,0.616159975528717
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,6144,10240,0.08899199962615967
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,6144,7168,0.13728000223636627
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,6144,6144,0.11593600362539291
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,6144,6144,0.11622399836778641
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,6144,7168,0.06595200300216675
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,6144,6144,0.05728000029921532
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,6144,5120,0.10729599744081497
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,6144,5120,0.05913599953055382
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,6144,5120,0.05049600079655647
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,6144,4096,0.04838399961590767
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,6144,4096,0.08895999938249588
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,6144,3584,0.08006399869918823
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,6144,7168,0.07622399926185608
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,6144,65536,0.7382720112800598
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,6144,4096,0.041471999138593674
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,6144,3584,0.04499199986457825
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,6144,3072,0.06684800237417221
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,6144,3584,0.037567999213933945
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,6144,3072,0.04092799872159958
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,6144,2560,0.05116799846291542
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,6144,2560,0.03478400036692619
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,6144,3072,0.033824000507593155
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,6144,65536,0.5192639827728271
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,6144,2048,0.046751998364925385
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,6144,2560,0.030079999938607216
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,6144,2048,0.04575999826192856
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,6144,1536,0.036288000643253326
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,6144,1536,0.03161599859595299
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,6144,2048,0.025599999353289604
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,6144,1024,0.024224000051617622
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,6144,1024,0.02457600086927414
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,6144,1536,0.023135999217629433
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,6144,768,0.0197759997099638
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,6144,1024,0.01772800087928772
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,6144,768,0.020576000213623047
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,6144,512,0.015936000272631645
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,6144,768,0.01571200042963028
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,6144,512,0.01548799965530634
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,6144,512,0.013344000093638897
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,6144,256,0.012160000391304493
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,6144,256,0.01142400037497282
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,6144,128,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,6144,256,0.011296000331640244
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,6144,128,0.010975999757647514
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,6144,128,0.010080000385642052
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,6144,64,0.013088000006973743
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,6144,64,0.013376000337302685
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,6144,64,0.009568000212311745
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,6144,32,0.01398400031030178
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,6144,32,0.014015999622642994
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,6144,32,0.009600000455975533
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,5120,12288,0.12454400211572647
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,5120,12288,0.07999999821186066
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,5120,16384,0.10480000078678131
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,5120,16384,0.15772800147533417
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,5120,10240,0.10678400099277496
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,5120,16384,0.13817599415779114
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,5120,10240,0.06924799829721451
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,5120,12288,0.10463999956846237
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,5120,10240,0.08972799777984619
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,5120,8192,0.056832000613212585
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,5120,8192,0.08499199897050858
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,5120,7168,0.07388799637556076
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,5120,8192,0.05894400179386139
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,5120,65536,0.6011199951171875
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,5120,65536,0.63155198097229
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,5120,7168,0.051552001386880875
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,5120,6144,0.04588799923658371
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,5120,6144,0.06867200136184692
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,5120,5120,0.057472001761198044
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,5120,7168,0.05283199995756149
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,5120,5120,0.03948799893260002
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,5120,6144,0.0525440014898777
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,5120,4096,0.04873599857091904
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,5120,5120,0.04054399952292442
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,5120,4096,0.03331200033426285
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,5120,3584,0.04614400118589401
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,5120,3584,0.031007999554276466
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,5120,4096,0.03488000109791756
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,5120,3072,0.04022400081157684
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,5120,3584,0.034272000193595886
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,5120,3072,0.058079998940229416
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,5120,2560,0.048767998814582825
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,5120,3072,0.028095999732613564
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,5120,2560,0.024960000067949295
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,5120,2560,0.024320000782608986
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,5120,2048,0.040672000497579575
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,5120,2048,0.04016000032424927
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,5120,2048,0.024000000208616257
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,5120,65536,0.5195519924163818
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,5120,1536,0.029279999434947968
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,5120,1536,0.030688000842928886
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,5120,1024,0.02380800060927868
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,5120,1536,0.019519999623298645
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,5120,1024,0.022112000733613968
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,5120,1024,0.015968000516295433
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,5120,768,0.0197759997099638
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,5120,768,0.01929599978029728
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,5120,512,0.01568000018596649
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,5120,768,0.014271999709308147
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,5120,512,0.015200000256299973
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,5120,512,0.012512000277638435
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,5120,256,0.011711999773979187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,5120,256,0.01206399966031313
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,5120,256,0.010111999697983265
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,5120,128,0.010591999627649784
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,5120,128,0.010591999627649784
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,5120,128,0.009696000255644321
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,5120,64,0.013279999606311321
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,5120,64,0.008960000239312649
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,5120,32,0.013856000266969204
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,5120,32,0.013887999579310417
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,5120,32,0.009312000125646591
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,4096,12288,0.1353279948234558
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,4096,16384,0.11971200257539749
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,5120,64,0.013311999849975109
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,4096,16384,0.1693439930677414
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,4096,12288,0.09251199662685394
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,4096,16384,0.09942399710416794
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,4096,10240,0.081727996468544
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,4096,12288,0.07558400183916092
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,4096,10240,0.08569599688053131
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,4096,65536,0.39001598954200745
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,4096,10240,0.07155200093984604
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,4096,8192,0.09459199756383896
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,4096,8192,0.09136000275611877
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,4096,65536,0.6463680267333984
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,4096,7168,0.06777600198984146
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,4096,65536,0.4039680063724518
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,4096,7168,0.08217599987983704
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,4096,8192,0.052799999713897705
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,4096,6144,0.05951999872922897
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,4096,6144,0.054687999188899994
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,4096,7168,0.047775998711586
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,4096,5120,0.0607680007815361
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,4096,5120,0.0549440011382103
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,4096,4096,0.05152000114321709
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,4096,6144,0.042047999799251556
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,4096,5120,0.035999998450279236
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,4096,4096,0.05116799846291542
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,4096,3584,0.04495999962091446
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,4096,3584,0.04492799937725067
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,4096,4096,0.030112000182271004
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,4096,3072,0.04198399931192398
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,4096,3584,0.027615999802947044
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,4096,3072,0.032127998769283295
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,4096,2560,0.03478400036692619
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,4096,3072,0.025119999423623085
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,4096,2560,0.03440000116825104
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,4096,2560,0.022304000332951546
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,4096,2048,0.029759999364614487
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,4096,1536,0.023072000592947006
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,4096,2048,0.020767999812960625
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,4096,2048,0.028960000723600388
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,4096,1536,0.022752000018954277
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,4096,1536,0.017696000635623932
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,4096,1024,0.017311999574303627
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,4096,1024,0.017343999817967415
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,4096,768,0.014976000413298607
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,4096,1024,0.015904000028967857
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,4096,768,0.014655999839305878
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,4096,768,0.014208000153303146
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,4096,512,0.012512000277638435
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,4096,512,0.012256000190973282
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,4096,512,0.01228800043463707
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,4096,256,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,4096,256,0.010304000228643417
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,4096,128,0.008832000195980072
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,4096,256,0.009824000298976898
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,4096,128,0.00940799992531538
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,4096,128,0.009631999768316746
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,4096,64,0.012703999876976013
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,4096,64,0.012671999633312225
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,4096,64,0.009440000168979168
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,4096,32,0.013407999649643898
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,4096,32,0.013183999806642532
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,4096,32,0.009216000325977802
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,3584,12288,0.0873280018568039
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,3584,12288,0.12428800016641617
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,3584,16384,0.08953599631786346
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,3584,16384,0.10924799740314484
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,3584,12288,0.0748480036854744
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,3584,16384,0.09785600006580353
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,3584,10240,0.1093439981341362
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,3584,10240,0.10534399747848511
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,3584,8192,0.06812799721956253
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,3584,10240,0.06457599997520447
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,3584,8192,0.08895999938249588
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,3584,7168,0.0814720019698143
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,3584,8192,0.05305600166320801
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,3584,65536,0.5814080238342285
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,3584,7168,0.08249600231647491
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,3584,7168,0.04816000163555145
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,3584,6144,0.05516799911856651
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,3584,6144,0.07366400212049484
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,3584,6144,0.04188799858093262
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,3584,5120,0.06496000289916992
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,3584,65536,0.3412480056285858
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,3584,5120,0.062431998550891876
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,3584,5120,0.036639999598264694
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,3584,4096,0.04787199944257736
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,3584,4096,0.05129599943757057
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,3584,4096,0.03315199911594391
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,3584,3584,0.04492799937725067
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,3584,65536,0.3630400002002716
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,3584,3584,0.028063999488949776
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,3584,3072,0.039455998688936234
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,3584,3072,0.03961599990725517
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,3584,3072,0.02630399912595749
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,3584,2560,0.035551998764276505
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,3584,2560,0.03561599925160408
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,3584,2048,0.02956799976527691
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,3584,2560,0.02348800003528595
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,3584,2048,0.0297279991209507
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,3584,2048,0.019999999552965164
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,3584,1536,0.023231999948620796
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,3584,1536,0.02239999920129776
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,3584,1536,0.018880000337958336
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,3584,1024,0.016992000862956047
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,3584,1024,0.01727999933063984
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,3584,1024,0.01583999954164028
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,3584,768,0.014336000196635723
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,3584,3584,0.04636799916625023
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,3584,768,0.014271999709308147
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,3584,768,0.013791999779641628
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,3584,512,0.01206399966031313
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,3584,512,0.01244799979031086
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,3584,256,0.010623999871313572
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,3584,512,0.012160000391304493
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,3584,256,0.010367999784648418
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,3584,128,0.009279999881982803
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,3584,256,0.010048000141978264
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,3584,128,0.00902399979531765
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,3584,64,0.012896000407636166
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,3584,128,0.009375999681651592
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,3584,64,0.00886400043964386
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,3584,64,0.012799999676644802
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,3584,32,0.013024000450968742
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,3584,32,0.012896000407636166
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,3584,32,0.00902399979531765
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,3072,12288,0.08086399734020233
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,3072,16384,0.08057600259780884
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,3072,16384,0.09171199798583984
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,3072,12288,0.07673600316047668
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,3072,16384,0.07971200346946716
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,3072,12288,0.062431998550891876
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,3072,10240,0.09920000284910202
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,3072,10240,0.06911999732255936
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,3072,10240,0.05423999950289726
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,3072,8192,0.08524800091981888
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,3072,7168,0.07814399898052216
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,3072,8192,0.057760000228881836
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,3072,8192,0.04335999861359596
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,3072,65536,0.2874560058116913
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,3072,7168,0.0769599974155426
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,3072,65536,0.2964479923248291
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,3072,6144,0.06803199648857117
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,3072,7168,0.03884800150990486
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,3072,6144,0.06697600334882736
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,3072,6144,0.034272000193595886
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,3072,5120,0.055135998874902725
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,3072,5120,0.05539200082421303
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,3072,5120,0.03014400042593479
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,3072,4096,0.04636799916625023
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,3072,4096,0.04566400125622749
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,3072,4096,0.025919999927282333
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,3072,3584,0.04224000126123428
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,3072,3584,0.040863998234272
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,3072,3072,0.037248000502586365
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,3072,65536,0.305184006690979
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,3072,3584,0.023711999878287315
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,3072,3072,0.0390079990029335
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,3072,3072,0.021247999742627144
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,3072,2560,0.0323840007185936
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,3072,2560,0.03062400035560131
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,3072,2560,0.01929599978029728
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,3072,2048,0.027648000046610832
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,3072,2048,0.024447999894618988
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,3072,1536,0.021183999255299568
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,3072,2048,0.01679999940097332
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,3072,1536,0.01929599978029728
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,3072,1024,0.01616000011563301
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,3072,1536,0.01571200042963028
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,3072,1024,0.016224000602960587
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,3072,768,0.013824000023305416
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,3072,768,0.013344000093638897
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,3072,1024,0.013376000337302685
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,3072,768,0.012160000391304493
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,3072,512,0.01158399973064661
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,3072,512,0.011807999573647976
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,3072,256,0.010304000228643417
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,3072,512,0.010975999757647514
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,3072,256,0.009727999567985535
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,3072,128,0.009279999881982803
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,3072,256,0.009824000298976898
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,3072,64,0.012736000120639801
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,3072,128,0.009344000369310379
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,3072,128,0.009184000082314014
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,3072,64,0.012480000033974648
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,3072,64,0.008415999822318554
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,3072,32,0.012864000163972378
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,3072,32,0.012640000320971012
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,3072,32,0.008927999995648861
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,2560,12288,0.06083200126886368
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,2560,12288,0.05305600166320801
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,2560,16384,0.06169600039720535
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,2560,16384,0.05795200169086456
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,2560,10240,0.04639999940991402
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,2560,12288,0.05212799832224846
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,2560,10240,0.06003199890255928
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,2560,10240,0.04444799944758415
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,2560,8192,0.07638400048017502
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,2560,8192,0.050912000238895416
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,2560,16384,0.07814399898052216
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,2560,65536,0.2346239984035492
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,2560,7168,0.036607999354600906
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,2560,8192,0.03276799991726875
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,2560,7168,0.06668800115585327
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,2560,6144,0.03625600039958954
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,2560,65536,0.24489599466323853
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,2560,7168,0.03177599981427193
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,2560,6144,0.0589120015501976
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,2560,5120,0.05129599943757057
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,2560,6144,0.028960000723600388
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,2560,5120,0.05516799911856651
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,2560,4096,0.04342399910092354
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,2560,5120,0.024032000452280045
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,2560,4096,0.04137599840760231
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,2560,4096,0.021088000386953354
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,2560,3584,0.03791999816894531
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,2560,65536,0.23686400055885315
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,2560,3584,0.02659199945628643
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,2560,3072,0.037248000502586365
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,2560,3584,0.019231999292969704
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,2560,3072,0.01775999926030636
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,2560,3072,0.03500799834728241
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,2560,2560,0.030079999938607216
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,2560,2560,0.027744000777602196
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,2560,2560,0.015904000028967857
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,2560,2048,0.02287999913096428
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,2560,2048,0.01408000010997057
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,2560,2048,0.0244159996509552
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,2560,1536,0.01881599985063076
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,2560,1536,0.0191040001809597
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,2560,1536,0.01398400031030178
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,2560,1024,0.014783999882638454
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,2560,768,0.01321600005030632
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,2560,1024,0.015231999568641186
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,2560,768,0.012896000407636166
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,2560,1024,0.012032000347971916
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,2560,512,0.011103999800980091
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,2560,768,0.011680000461637974
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,2560,512,0.011327999643981457
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,2560,512,0.010367999784648418
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,2560,256,0.008991999551653862
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,2560,256,0.010015999898314476
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,2560,128,0.008511999621987343
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,2560,256,0.009568000212311745
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,2560,128,0.008927999995648861
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,2560,128,0.009312000125646591
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,2560,64,0.012543999589979649
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,2560,64,0.01244799979031086
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,2560,64,0.008576000109314919
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,2560,32,0.012736000120639801
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,2560,32,0.012768000364303589
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,2560,32,0.008576000109314919
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,2048,12288,0.044224001467227936
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,2048,12288,0.07894399762153625
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,2048,16384,0.05273599922657013
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,2048,16384,0.12249600142240524
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,2048,16384,0.056063998490571976
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,2048,12288,0.04399999976158142
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,2048,10240,0.06764800101518631
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,2048,10240,0.040031999349594116
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,2048,10240,0.03827200084924698
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,2048,8192,0.0570559985935688
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,2048,8192,0.04972799867391586
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,2048,65536,0.15625600516796112
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,2048,8192,0.031936001032590866
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,2048,65536,0.23455999791622162
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,2048,7168,0.04825599864125252
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,2048,7168,0.04867200180888176
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,2048,7168,0.029120000079274178
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,2048,6144,0.03001599945127964
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,2048,6144,0.04224000126123428
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,2048,5120,0.026528000831604004
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,2048,6144,0.025919999927282333
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,2048,5120,0.040352001786231995
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,2048,5120,0.02300800010561943
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,2048,4096,0.030400000512599945
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,2048,4096,0.034304000437259674
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,2048,3584,0.02099199965596199
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,2048,4096,0.020255999639630318
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,2048,65536,0.20345599949359894
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,2048,3584,0.026335999369621277
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,2048,3584,0.028192000463604927
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,2048,3072,0.01958400011062622
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,2048,3072,0.0244159996509552
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,2048,3072,0.016863999888300896
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,2048,2560,0.017535999417304993
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,2048,2560,0.021088000386953354
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,2048,2048,0.019487999379634857
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,2048,2560,0.01539199985563755
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,2048,2048,0.01849599927663803
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,2048,1536,0.017055999487638474
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,2048,2048,0.01398400031030178
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,2048,1536,0.013407999649643898
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,2048,1024,0.013663999736309052
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,2048,1024,0.01158399973064661
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,2048,768,0.010495999827980995
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,2048,1024,0.011487999930977821
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,2048,768,0.012608000077307224
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,2048,768,0.010912000201642513
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,2048,512,0.010912000201642513
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,2048,512,0.009664000011980534
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,2048,256,0.009696000255644321
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,2048,256,0.009568000212311745
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,2048,512,0.011264000087976456
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,2048,128,0.00848000030964613
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,2048,256,0.009184000082314014
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,2048,128,0.008287999778985977
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,2048,128,0.008736000396311283
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,2048,64,0.012000000104308128
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,2048,64,0.01206399966031313
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,2048,64,0.00825599953532219
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,2048,32,0.011807999573647976
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,2048,32,0.012160000391304493
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,2048,32,0.008352000266313553
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,2048,1536,0.015231999568641186
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,1536,12288,0.05289600044488907
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,1536,12288,0.04041599854826927
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,1536,16384,0.06339199841022491
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,1536,16384,0.04979199916124344
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,1536,16384,0.05603199824690819
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,1536,12288,0.04416000097990036
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,1536,10240,0.04713600128889084
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,1536,10240,0.03651199862360954
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,1536,10240,0.03827200084924698
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,1536,8192,0.05689600110054016
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,1536,8192,0.03372799977660179
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,1536,8192,0.031968001276254654
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,1536,7168,0.04428799822926521
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,1536,7168,0.04339199885725975
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,1536,65536,0.17190399765968323
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,1536,6144,0.035071998834609985
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,1536,7168,0.029279999434947968
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,1536,6144,0.034912001341581345
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,1536,5120,0.026079999282956123
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,1536,6144,0.025919999927282333
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,1536,5120,0.033824000507593155
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,1536,65536,0.16832000017166138
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,1536,5120,0.02284800074994564
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,1536,4096,0.023391999304294586
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,1536,4096,0.025536000728607178
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,1536,4096,0.019551999866962433
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,1536,3584,0.02848000079393387
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,1536,65536,0.20281599462032318
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,1536,3584,0.022655999287962914
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,1536,3584,0.01833599992096424
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,1536,3072,0.02115200087428093
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,1536,3072,0.0197759997099638
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,1536,2560,0.01849599927663803
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,1536,3072,0.016672000288963318
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,1536,2560,0.015263999812304974
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,1536,2560,0.021888000890612602
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,1536,2048,0.015647999942302704
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,1536,2048,0.01696000061929226
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,1536,1536,0.0161920003592968
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,1536,2048,0.013791999779641628
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,1536,1536,0.015072000212967396
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,1536,1024,0.013376000337302685
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,1536,1024,0.011264000087976456
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,1536,1536,0.013279999606311321
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,1536,1024,0.01142400037497282
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,1536,768,0.010304000228643417
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,1536,768,0.012415999546647072
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,1536,512,0.010944000445306301
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,1536,768,0.013055999763309956
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,1536,256,0.008927999995648861
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,1536,512,0.010591999627649784
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,1536,512,0.011103999800980091
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,1536,256,0.008704000152647495
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,1536,256,0.009279999881982803
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,1536,128,0.008704000152647495
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,1536,128,0.008415999822318554
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,1536,128,0.008704000152647495
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,1536,64,0.012319999746978283
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,1536,64,0.01206399966031313
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,1536,32,0.01206399966031313
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,1536,32,0.012319999746978283
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,1536,64,0.007968000136315823
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,1536,32,0.00848000030964613
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,1024,12288,0.037408001720905304
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,1024,12288,0.03680000081658363
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,1024,16384,0.04438399896025658
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,1024,16384,0.042367998510599136
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,1024,16384,0.03686400130391121
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,1024,12288,0.02796800062060356
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,1024,10240,0.03519999980926514
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,1024,10240,0.035071998834609985
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,1024,8192,0.03743999823927879
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,1024,8192,0.03324799984693527
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,1024,65536,0.09164799749851227
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,1024,7168,0.033984001725912094
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,1024,8192,0.02131200022995472
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,1024,65536,0.13152000308036804
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,1024,7168,0.028672000393271446
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,1024,7168,0.019487999379634857
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,1024,6144,0.0261439997702837
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,1024,5120,0.025472000241279602
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,1024,6144,0.027456000447273254
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,1024,6144,0.018079999834299088
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,1024,5120,0.039903998374938965
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,1024,10240,0.024831999093294144
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,1024,65536,0.11727999895811081
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,1024,4096,0.021888000890612602
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,1024,5120,0.017376000061631203
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,1024,4096,0.03315199911594391
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,1024,4096,0.014208000153303146
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,1024,3584,0.021536000072956085
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,1024,3584,0.01974399946630001
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,1024,3072,0.020608000457286835
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,1024,3072,0.01759999990463257
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,1024,2560,0.01696000061929226
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,1024,3584,0.013151999562978745
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,1024,3072,0.012608000077307224
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,1024,2560,0.016416000202298164
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,1024,2560,0.014431999996304512
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,1024,2048,0.021695999428629875
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,1024,2048,0.015296000055968761
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,1024,1536,0.012768000364303589
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,1024,1536,0.01708799973130226
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,1024,2048,0.012608000077307224
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,1024,1024,0.01104000024497509
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,1024,1536,0.011807999573647976
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,1024,1024,0.014336000196635723
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,1024,768,0.010111999697983265
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,1024,1024,0.01142400037497282
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,1024,768,0.010015999898314476
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,1024,512,0.00902399979531765
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,1024,768,0.010879999957978725
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,1024,512,0.009535999968647957
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,1024,512,0.010463999584317207
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,1024,256,0.00863999966531992
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,1024,256,0.008991999551653862
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,1024,256,0.008383999578654766
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,1024,128,0.008736000396311283
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,1024,128,0.008128000423312187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,1024,128,0.008224000222980976
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,1024,64,0.01190400030463934
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,1024,64,0.011711999773979187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,1024,64,0.008031999692320824
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,1024,32,0.011872000060975552
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,1024,32,0.011839999817311764
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,1024,32,0.00800000037997961
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,768,16384,0.04198399931192398
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,768,12288,0.03791999816894531
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,768,16384,0.042367998510599136
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,768,12288,0.03324799984693527
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,768,16384,0.03561599925160408
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,768,12288,0.027807999402284622
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,768,10240,0.030432000756263733
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,768,10240,0.03152000159025192
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,768,8192,0.03551999852061272
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,768,10240,0.024480000138282776
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,768,8192,0.027648000046610832
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,768,65536,0.1111999973654747
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,768,8192,0.020927999168634415
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,768,65536,0.08764799684286118
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,768,7168,0.026815999299287796
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,768,7168,0.02848000079393387
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,768,7168,0.019360000267624855
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,768,6144,0.02579200081527233
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,768,6144,0.02521600015461445
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,768,5120,0.024383999407291412
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,768,5120,0.023231999948620796
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,768,65536,0.11641599982976913
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,768,6144,0.020160000771284103
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,768,4096,0.022112000733613968
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,768,5120,0.0161920003592968
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,768,4096,0.021247999742627144
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,768,4096,0.018303999677300453
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,768,3584,0.021376000717282295
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,768,3584,0.01929599978029728
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,768,3584,0.013151999562978745
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,768,3072,0.019487999379634857
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,768,3072,0.018144000321626663
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,768,3072,0.012799999676644802
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,768,2560,0.017184000462293625
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,768,2560,0.016607999801635742
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,768,2048,0.014816000126302242
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,768,2048,0.014976000413298607
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,768,2560,0.012319999746978283
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,768,2048,0.010847999714314938
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,768,1536,0.012799999676644802
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,768,1536,0.012256000190973282
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,768,1536,0.012896000407636166
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,768,1024,0.011103999800980091
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,768,1024,0.01065600011497736
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,768,1024,0.010912000201642513
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,768,768,0.01065600011497736
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,768,768,0.0098879998549819
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,768,512,0.008927999995648861
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,768,768,0.012319999746978283
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,768,512,0.008991999551653862
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,768,512,0.01033599954098463
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,768,256,0.008704000152647495
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,768,256,0.00863999966531992
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,768,128,0.008352000266313553
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,768,128,0.007935999892652035
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,768,64,0.011359999887645245
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,768,128,0.008511999621987343
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,768,64,0.011872000060975552
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,768,32,0.011776000261306763
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,768,64,0.008191999979317188
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,768,32,0.01190400030463934
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,768,32,0.008224000222980976
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,768,256,0.00848000030964613
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,512,12288,0.04022400081157684
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,512,16384,0.03494400158524513
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,512,16384,0.03500799834728241
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,512,16384,0.03455999866127968
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,512,12288,0.029920000582933426
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,512,12288,0.02783999964594841
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,512,10240,0.03964800015091896
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,512,10240,0.02816000021994114
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,512,8192,0.038176000118255615
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,512,65536,0.07884799689054489
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,512,10240,0.03625600039958954
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,512,65536,0.07446400076150894
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,512,8192,0.026688000187277794
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,512,7168,0.031936001032590866
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,512,8192,0.029791999608278275
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,512,7168,0.026399999856948853
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,512,6144,0.02425600029528141
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,512,6144,0.025151999667286873
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,512,7168,0.019519999623298645
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,512,5120,0.023104000836610794
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,512,6144,0.017343999817967415
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,512,5120,0.023520000278949738
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,512,65536,0.1165120005607605
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,512,5120,0.021536000072956085
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,512,4096,0.020447999238967896
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,512,4096,0.02115200087428093
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,512,3584,0.018559999763965607
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,512,3584,0.019487999379634857
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,512,4096,0.014175999909639359
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,512,3584,0.013472000136971474
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,512,3072,0.017503999173641205
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,512,3072,0.01788800023496151
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,512,2560,0.01603199914097786
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,512,2560,0.016127999871969223
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,512,2560,0.011552000418305397
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,512,3072,0.012671999633312225
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,512,2048,0.014655999839305878
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,512,2048,0.014976000413298607
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,512,2048,0.010688000358641148
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,512,1536,0.011872000060975552
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,512,1536,0.01206399966031313
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,512,1024,0.01065600011497736
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,512,1536,0.010879999957978725
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,512,1024,0.010463999584317207
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,512,768,0.010239999741315842
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,512,1024,0.011231999844312668
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,512,768,0.009952000342309475
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,512,768,0.012032000347971916
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,512,512,0.009119999594986439
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,512,512,0.009056000038981438
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,512,512,0.010143999941647053
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,512,256,0.0081599997356534
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,512,256,0.00863999966531992
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,512,256,0.009184000082314014
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,512,128,0.007903999648988247
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,512,128,0.008063999935984612
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,512,64,0.011487999930977821
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,512,128,0.008063999935984612
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,512,64,0.011615999974310398
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,512,32,0.011680000461637974
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,512,32,0.011807999573647976
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,512,64,0.007968000136315823
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,512,32,0.008287999778985977
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,256,12288,0.031647998839616776
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,256,12288,0.027168000116944313
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,256,16384,0.033215999603271484
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,256,16384,0.029888000339269638
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,256,16384,0.05225599929690361
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,256,12288,0.02751999907195568
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,256,10240,0.026048000901937485
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,256,10240,0.02534399926662445
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,256,10240,0.036031998693943024
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,256,8192,0.02364799939095974
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,256,8192,0.025887999683618546
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,256,7168,0.029823999851942062
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,256,8192,0.030047999694943428
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,256,7168,0.02348800003528595
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,256,65536,0.063680000603199
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,256,65536,0.11574400216341019
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,256,6144,0.028831999748945236
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,256,6144,0.022143999114632607
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,256,65536,0.05366399884223938
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,256,7168,0.027295999228954315
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,256,6144,0.017696000635623932
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,256,5120,0.027135999873280525
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,256,4096,0.02054399996995926
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,256,3584,0.020031999796628952
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,256,5120,0.015744000673294067
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,256,4096,0.020735999569296837
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,256,4096,0.01408000010997057
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,256,3584,0.016992000862956047
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,256,3584,0.019519999623298645
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,256,2560,0.015776000916957855
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,256,3072,0.01756799966096878
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,256,3072,0.01711999997496605
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,256,2560,0.016127999871969223
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,256,3072,0.015744000673294067
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,256,2560,0.013919999822974205
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,256,2048,0.0144640002399683
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,256,2048,0.014655999839305878
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,256,2048,0.010591999627649784
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,256,1536,0.012191999703645706
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,256,1536,0.011648000217974186
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,256,1536,0.011487999930977821
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,256,5120,0.02112000063061714
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,256,1024,0.010688000358641148
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,256,1024,0.01056000031530857
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,256,1024,0.009727999567985535
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,256,768,0.009568000212311745
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,256,768,0.00979200005531311
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,256,768,0.011359999887645245
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,256,512,0.009151999838650227
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,256,512,0.009056000038981438
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,256,256,0.00854399986565113
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,256,512,0.010239999741315842
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,256,256,0.00848000030964613
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,256,128,0.008128000423312187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,256,128,0.007807999849319458
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,256,128,0.008063999935984612
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,256,256,0.008991999551653862
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,256,64,0.011455999687314034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,256,64,0.011359999887645245
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,256,64,0.007807999849319458
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,256,32,0.011487999930977821
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,256,32,0.011648000217974186
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,256,32,0.00774399982765317
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,128,12288,0.0272000003606081
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,128,12288,0.026688000187277794
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,128,16384,0.029983999207615852
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,128,16384,0.038336001336574554
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,128,16384,0.02937600016593933
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,128,10240,0.02687999978661537
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,128,12288,0.02768000029027462
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,128,10240,0.024831999093294144
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,128,10240,0.024351999163627625
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,128,8192,0.023072000592947006
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,128,65536,0.0639680027961731
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,128,7168,0.02627200074493885
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,128,8192,0.022207999601960182
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,128,65536,0.04623999819159508
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,128,8192,0.029823999851942062
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,128,7168,0.021727999672293663
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,128,7168,0.019168000668287277
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,128,6144,0.025439999997615814
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,128,6144,0.02131200022995472
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,128,6144,0.023679999634623528
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,128,5120,0.020096000283956528
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,128,5120,0.020416000857949257
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,128,5120,0.021088000386953354
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,128,4096,0.02208000048995018
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,128,4096,0.0191040001809597
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,128,4096,0.01398400031030178
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,128,65536,0.11641599982976913
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,128,3584,0.01913600042462349
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,128,3584,0.018400000408291817
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,128,3584,0.01321600005030632
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,128,3072,0.01708799973130226
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,128,3072,0.01788800023496151
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,128,3072,0.01190400030463934
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,128,2560,0.015936000272631645
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,128,2560,0.020320000126957893
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,128,2048,0.014783999882638454
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,128,2048,0.014560000039637089
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,128,2560,0.011680000461637974
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,128,2048,0.01065600011497736
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,128,1536,0.01152000017464161
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,128,1536,0.012000000104308128
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,128,1536,0.010944000445306301
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,128,1024,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,128,1024,0.010400000028312206
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,128,768,0.010080000385642052
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,128,768,0.009664000011980534
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,128,1024,0.00979200005531311
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,128,768,0.011264000087976456
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,128,512,0.009344000369310379
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,128,512,0.008671999908983707
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,128,512,0.009759999811649323
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,128,256,0.007935999892652035
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,128,256,0.00800000037997961
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,128,256,0.008832000195980072
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,128,128,0.008031999692320824
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,128,128,0.007615999784320593
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,128,128,0.007968000136315823
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,128,64,0.01142400037497282
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,128,64,0.011327999643981457
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,128,32,0.011327999643981457
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,128,64,0.0081599997356534
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,128,32,0.011680000461637974
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,128,32,0.007840000092983246
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,64,12288,0.027424000203609467
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,64,12288,0.027648000046610832
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,64,16384,0.028960000723600388
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,64,16384,0.02921600081026554
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,64,16384,0.054655998945236206
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,64,12288,0.04403200000524521
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,64,10240,0.026655999943614006
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,64,10240,0.024064000695943832
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,64,8192,0.025919999927282333
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,64,10240,0.037696000188589096
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,64,65536,0.04639999940991402
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,64,8192,0.023455999791622162
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,64,7168,0.02319999970495701
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,64,8192,0.020735999569296837
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,64,7168,0.022016000002622604
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,64,7168,0.01929599978029728
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,64,6144,0.021376000717282295
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,64,6144,0.022112000733613968
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,64,5120,0.02163200080394745
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,64,6144,0.023679999634623528
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,64,5120,0.02038400061428547
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,64,65536,0.11635199934244156
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,64,5120,0.021407999098300934
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,64,65536,0.047359999269247055
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,64,4096,0.02006400004029274
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,64,4096,0.019648000597953796
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,64,4096,0.01408000010997057
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,64,3584,0.01926399953663349
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,64,3584,0.01913600042462349
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,64,3072,0.018432000651955605
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,64,3072,0.018015999346971512
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,64,3584,0.013791999779641628
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,64,2560,0.016767999157309532
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,64,3072,0.012256000190973282
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,64,2560,0.016575999557971954
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,64,2048,0.014047999866306782
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,64,2560,0.011231999844312668
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,64,2048,0.01462399959564209
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,64,1536,0.011615999974310398
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,64,2048,0.010688000358641148
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,64,1536,0.012256000190973282
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,64,1024,0.010304000228643417
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,64,1536,0.010495999827980995
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,64,1024,0.01056000031530857
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,64,1024,0.01065600011497736
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,64,768,0.009727999567985535
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,64,768,0.009631999768316746
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,64,768,0.009983999654650688
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,64,512,0.008895999751985073
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,64,512,0.008895999751985073
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,64,512,0.008736000396311283
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,64,256,0.008063999935984612
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,64,256,0.008063999935984612
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,64,256,0.00854399986565113
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,64,128,0.007552000228315592
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,64,128,0.007968000136315823
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,64,64,0.011359999887645245
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,64,128,0.0080960001796484
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,64,64,0.01104000024497509
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,64,64,0.0077760000713169575
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,64,32,0.011327999643981457
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,64,32,0.01152000017464161
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,64,32,0.0080960001796484
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,32,12288,0.027615999802947044
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,32,12288,0.02768000029027462
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,32,16384,0.029055999591946602
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,32,16384,0.029184000566601753
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,32,16384,0.050144001841545105
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,32,12288,0.04124800115823746
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,32,10240,0.02675200067460537
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,32,10240,0.024800000712275505
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,32,65536,0.047968000173568726
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,32,8192,0.023679999634623528
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,32,65536,0.047040000557899475
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,32,10240,0.03625600039958954
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,32,8192,0.022624000906944275
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,32,8192,0.029920000582933426
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,32,7168,0.023711999878287315
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,32,7168,0.021983999758958817
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,32,6144,0.021727999672293663
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,32,7168,0.025439999997615814
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,32,6144,0.02112000063061714
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,32,65536,0.11168000102043152
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,32,6144,0.023520000278949738
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,32,5120,0.021247999742627144
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,32,5120,0.020416000857949257
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,32,4096,0.019872000440955162
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,32,5120,0.019551999866962433
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,32,4096,0.018751999363303185
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,32,4096,0.01756799966096878
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,32,3584,0.019519999623298645
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,32,3584,0.019360000267624855
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,32,3584,0.016224000602960587
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,32,3072,0.01692800037562847
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,32,3072,0.017920000478625298
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,32,2560,0.016575999557971954
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,32,3072,0.014527999795973301
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,32,2560,0.014175999909639359
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,32,2048,0.015104000456631184
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,32,2048,0.012032000347971916
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,32,2048,0.014783999882638454
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,32,1536,0.011680000461637974
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,32,1536,0.012032000347971916
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,32,1536,0.011711999773979187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,32,1024,0.010623999871313572
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,32,1024,0.01017600018531084
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,32,1024,0.010463999584317207
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,32,768,0.0098879998549819
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,32,768,0.009472000412642956
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,32,768,0.010912000201642513
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,32,2560,0.015968000516295433
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,32,512,0.008608000352978706
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,32,512,0.009088000282645226
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,32,512,0.009664000011980534
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,32,256,0.0081599997356534
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,32,256,0.0080960001796484
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,32,256,0.008832000195980072
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,32,128,0.0077760000713169575
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,32,128,0.007679999805986881
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,32,64,0.011455999687314034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,32,64,0.011359999887645245
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,32,128,0.007968000136315823
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,384,32,32,0.011327999643981457
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,32,64,0.007615999784320593
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,384,32,32,0.011648000217974186
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,384,32,32,0.007840000092983246
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,65536,12288,0.4713599979877472
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,65536,10240,0.3967359960079193
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,65536,16384,0.6205440163612366
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,65536,12288,0.4657920002937317
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,65536,10240,0.39136001467704773
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,65536,12288,0.41334399580955505
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,65536,8192,0.32204800844192505
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,65536,8192,0.31676799058914185
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,65536,10240,0.3513279855251312
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,65536,16384,0.6100479960441589
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,65536,8192,0.2816320061683655
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,65536,7168,0.28492799401283264
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,65536,16384,0.5352960228919983
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,65536,6144,0.2433920055627823
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,65536,7168,0.28406399488449097
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,65536,6144,0.24534399807453156
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,65536,7168,0.2544960081577301
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,65536,4096,0.17049600183963776
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,65536,5120,0.20841600000858307
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,65536,5120,0.2110079973936081
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,65536,6144,0.22166399657726288
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,65536,5120,0.18857599794864655
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,65536,4096,0.17260800302028656
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,65536,3584,0.1536320000886917
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,65536,3584,0.15254400670528412
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,65536,3072,0.13398399949073792
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,65536,4096,0.15328000485897064
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,65536,2560,0.1151999980211258
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,65536,3072,0.13331200182437897
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,65536,3584,0.14262400567531586
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,65536,2560,0.1136000007390976
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,65536,2048,0.09372799843549728
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,65536,3072,0.1239359974861145
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,65536,2560,0.10995200276374817
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,65536,2048,0.09507200121879578
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,65536,1536,0.07523199915885925
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,65536,1536,0.07222399860620499
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,65536,2048,0.09110400080680847
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,65536,1024,0.05721599981188774
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,65536,1536,0.07619199901819229
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,65536,1024,0.05305600166320801
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,65536,768,0.04255999997258186
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,65536,768,0.04822399839758873
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,65536,1024,0.06003199890255928
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,65536,512,0.03392000123858452
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,65536,512,0.033344000577926636
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,65536,768,0.05190400034189224
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,65536,512,0.0432640016078949
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,65536,256,0.023520000278949738
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,65536,256,0.023072000592947006
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,65536,128,0.01942400075495243
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,65536,256,0.03455999866127968
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,65536,128,0.019680000841617584
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,65536,64,0.021183999255299568
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,65536,128,0.028991999104619026
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,65536,64,0.02131200022995472
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,65536,64,0.02579200081527233
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,65536,32,0.028960000723600388
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,65536,32,0.0289280004799366
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,65536,32,0.026176000013947487
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,16384,16384,0.16844800114631653
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,16384,12288,0.1231359988451004
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,16384,16384,0.16070400178432465
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,16384,16384,0.13971200585365295
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,16384,10240,0.10582400113344193
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,16384,10240,0.10540799796581268
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,16384,12288,0.12479999661445618
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,16384,12288,0.10937599837779999
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,16384,65536,0.6507200002670288
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,16384,8192,0.08668799698352814
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,16384,8192,0.08780799806118011
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,16384,7168,0.07683199644088745
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,16384,10240,0.09328000247478485
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,16384,7168,0.0801599994301796
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,16384,8192,0.07552000135183334
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,16384,6144,0.06864000111818314
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,16384,6144,0.07039999961853027
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,16384,7168,0.06883200258016586
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,16384,5120,0.060416001826524734
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,16384,5120,0.05798399820923805
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,16384,6144,0.059647999703884125
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,16384,5120,0.05113599821925163
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,16384,4096,0.04995200037956238
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,16384,4096,0.04944000020623207
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,16384,3584,0.04473600164055824
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,16384,3584,0.04467200115323067
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,16384,4096,0.04259200021624565
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,16384,3072,0.04012800008058548
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,16384,3072,0.040031999349594116
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,16384,65536,0.59552001953125
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,16384,3584,0.04182400181889534
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,16384,2560,0.03494400158524513
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,16384,3072,0.03513599932193756
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,16384,2560,0.034304000437259674
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,16384,2048,0.030848000198602676
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,16384,2560,0.033535998314619064
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,16384,2048,0.029279999434947968
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,16384,65536,0.5183680057525635
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,16384,1536,0.024191999807953835
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,16384,2048,0.02828799933195114
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,16384,1536,0.024447999894618988
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,16384,1536,0.023711999878287315
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,16384,1024,0.01945599913597107
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,16384,1024,0.019551999866962433
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,16384,768,0.017632000148296356
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,16384,1024,0.022112000733613968
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,16384,768,0.017055999487638474
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,16384,768,0.017632000148296356
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,16384,512,0.014240000396966934
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,16384,512,0.014208000153303146
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,16384,256,0.011680000461637974
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,16384,512,0.015072000212967396
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,16384,256,0.01158399973064661
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,16384,256,0.013088000006973743
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,16384,128,0.01104000024497509
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,16384,128,0.010432000271975994
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,16384,128,0.011807999573647976
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,16384,64,0.013439999893307686
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,16384,64,0.01398400031030178
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,16384,64,0.01119999960064888
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,16384,32,0.01484800036996603
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,16384,32,0.014688000082969666
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,16384,32,0.011776000261306763
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,12288,16384,0.1619199961423874
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,12288,12288,0.12591999769210815
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,12288,12288,0.11791999638080597
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,12288,16384,0.15081599354743958
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,12288,16384,0.138047993183136
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,12288,10240,0.10124800354242325
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,12288,10240,0.1056319996714592
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,12288,12288,0.10729599744081497
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,12288,65536,0.5644159913063049
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,12288,8192,0.08556800335645676
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,12288,10240,0.09008000046014786
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,12288,8192,0.08691199868917465
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,12288,7168,0.07791999727487564
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,12288,8192,0.07340800017118454
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,12288,7168,0.07840000092983246
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,12288,6144,0.07023999840021133
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,12288,7168,0.0660799965262413
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,12288,6144,0.06828799843788147
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,12288,5120,0.058400001376867294
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,12288,6144,0.058687999844551086
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,12288,5120,0.0597120001912117
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,12288,4096,0.04944000020623207
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,12288,4096,0.049695998430252075
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,12288,5120,0.051072001457214355
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,12288,3584,0.04428799822926521
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,12288,4096,0.04198399931192398
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,12288,3584,0.0453759990632534
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,12288,3072,0.040383998304605484
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,12288,3072,0.04025600105524063
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,12288,65536,0.5416960120201111
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,12288,3584,0.040511999279260635
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,12288,2560,0.03420799970626831
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,12288,3072,0.034912001341581345
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,12288,2560,0.03436800092458725
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,12288,65536,0.5180479884147644
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,12288,2560,0.03152000159025192
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,12288,2048,0.02879999950528145
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,12288,2048,0.02937600016593933
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,12288,2048,0.027168000116944313
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,12288,1536,0.024224000051617622
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,12288,1536,0.02412799932062626
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,12288,1536,0.023104000836610794
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,12288,1024,0.01913600042462349
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,12288,1024,0.020128000527620316
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,12288,768,0.01679999940097332
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,12288,1024,0.01833599992096424
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,12288,768,0.01635199971497059
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,12288,768,0.016127999871969223
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,12288,512,0.014431999996304512
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,12288,512,0.01462399959564209
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,12288,256,0.011711999773979187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,12288,512,0.014015999622642994
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,12288,256,0.011455999687314034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,12288,256,0.011327999643981457
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,12288,128,0.010432000271975994
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,12288,128,0.010367999784648418
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,12288,64,0.013279999606311321
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,12288,64,0.013535999692976475
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,12288,32,0.014560000039637089
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,12288,64,0.010048000141978264
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,12288,32,0.01462399959564209
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,12288,32,0.010015999898314476
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,10240,16384,0.1619199961423874
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,10240,16384,0.1287039965391159
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,10240,12288,0.10211200267076492
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,10240,16384,0.1372479945421219
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,12288,128,0.010304000228643417
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,10240,12288,0.10294400155544281
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,10240,10240,0.08790399879217148
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,10240,12288,0.10678400099277496
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,10240,10240,0.08886399865150452
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,10240,8192,0.07395199686288834
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,10240,8192,0.07539200037717819
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,10240,10240,0.08988799899816513
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,10240,65536,0.45504000782966614
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,10240,8192,0.07049600034952164
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,10240,7168,0.06665600091218948
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,10240,6144,0.05984000116586685
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,10240,7168,0.06732799857854843
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,10240,7168,0.06329599767923355
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,10240,6144,0.059967998415231705
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,10240,5120,0.052191998809576035
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,10240,5120,0.05241600051522255
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,10240,6144,0.05503999814391136
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,10240,65536,0.47548800706863403
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,10240,4096,0.04972799867391586
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,10240,5120,0.04745600000023842
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,10240,4096,0.04543999955058098
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,10240,4096,0.04009599983692169
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,10240,3584,0.04022400081157684
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,10240,3584,0.04073600098490715
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,10240,3072,0.03859199956059456
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,10240,3072,0.03811199963092804
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,10240,3584,0.038015998899936676
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,10240,3072,0.032127998769283295
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,10240,65536,0.5156159996986389
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,10240,2560,0.034304000437259674
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,10240,2560,0.03385600075125694
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,10240,2048,0.029279999434947968
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,10240,1536,0.024351999163627625
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,10240,2048,0.029120000079274178
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,10240,2560,0.02953599952161312
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,10240,2048,0.02582399919629097
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,10240,1536,0.024639999493956566
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,10240,1536,0.022431999444961548
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,10240,1024,0.020128000527620316
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,10240,1024,0.018848000094294548
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,10240,1024,0.018303999677300453
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,10240,768,0.017184000462293625
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,10240,768,0.016448000445961952
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,10240,512,0.014592000283300877
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,10240,512,0.01414399966597557
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,10240,768,0.015936000272631645
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,10240,512,0.013919999822974205
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,10240,256,0.011231999844312668
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,10240,256,0.011008000001311302
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,10240,128,0.010304000228643417
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,10240,64,0.013407999649643898
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,10240,128,0.010208000428974628
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,10240,256,0.011615999974310398
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,10240,128,0.01017600018531084
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,10240,64,0.013344000093638897
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,10240,32,0.014175999909639359
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,10240,64,0.009568000212311745
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,10240,32,0.01398400031030178
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,10240,32,0.0098879998549819
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,8192,12288,0.07676800340414047
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,8192,12288,0.07094399631023407
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,8192,16384,0.09123200178146362
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,8192,16384,0.10425599664449692
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,8192,16384,0.111455999314785
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,8192,10240,0.06851200014352798
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,8192,12288,0.09068799763917923
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,8192,10240,0.06003199890255928
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,8192,10240,0.08009599894285202
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,8192,8192,0.05984000116586685
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,8192,7168,0.05417599901556969
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,8192,8192,0.050464000552892685
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,8192,8192,0.06255999952554703
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,8192,7168,0.045471999794244766
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,8192,65536,0.3596479892730713
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,8192,7168,0.052960000932216644
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,8192,6144,0.055135998874902725
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,8192,6144,0.05052800104022026
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,8192,5120,0.045791998505592346
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,8192,6144,0.047040000557899475
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,8192,65536,0.3163520097732544
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,8192,4096,0.029983999207615852
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,8192,4096,0.03001599945127964
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,8192,5120,0.04044799879193306
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,8192,3584,0.0261439997702837
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,8192,4096,0.03513599932193756
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,8192,3584,0.02675200067460537
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,8192,3072,0.035711999982595444
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,8192,65536,0.42239999771118164
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,8192,3072,0.024512000381946564
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,8192,3584,0.03190400078892708
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,8192,2560,0.020896000787615776
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,8192,5120,0.0344959981739521
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,8192,3072,0.028831999748945236
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,8192,2560,0.021088000386953354
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,8192,2048,0.018848000094294548
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,8192,2560,0.02489599958062172
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,8192,2048,0.018751999363303185
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,8192,2048,0.020479999482631683
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,8192,1536,0.016063999384641647
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,8192,1536,0.01664000004529953
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,8192,1024,0.013632000423967838
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,8192,1024,0.013856000266969204
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,8192,1536,0.018432000651955605
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,8192,768,0.012671999633312225
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,8192,1024,0.015519999898970127
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,8192,768,0.012415999546647072
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,8192,768,0.014751999638974667
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,8192,512,0.011168000288307667
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,8192,512,0.011296000331640244
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,8192,256,0.009983999654650688
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,8192,512,0.012256000190973282
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,8192,256,0.00979200005531311
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,8192,256,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,8192,128,0.009279999881982803
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,8192,128,0.008895999751985073
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,8192,128,0.009920000098645687
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,8192,64,0.012575999833643436
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,8192,64,0.012608000077307224
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,8192,32,0.013024000450968742
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,8192,64,0.009056000038981438
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,8192,32,0.013535999692976475
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,8192,32,0.009375999681651592
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,7168,12288,0.07158400118350983
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,7168,12288,0.0695360004901886
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,7168,16384,0.08736000210046768
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,7168,16384,0.08777599781751633
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,7168,16384,0.10172799974679947
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,7168,12288,0.0843840017914772
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,7168,10240,0.060575999319553375
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,7168,10240,0.058848001062870026
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,7168,10240,0.07171200215816498
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,7168,8192,0.049855999648571014
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,7168,8192,0.07030399888753891
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,7168,7168,0.0435199998319149
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,7168,65536,0.2815360128879547
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,7168,8192,0.06032000109553337
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,7168,7168,0.06239999830722809
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,7168,6144,0.040031999349594116
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,7168,6144,0.03964800015091896
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,7168,7168,0.04793599992990494
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,7168,5120,0.03452799841761589
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,7168,5120,0.033952001482248306
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,7168,6144,0.04598399996757507
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,7168,4096,0.028863999992609024
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,7168,5120,0.04041599854826927
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,7168,4096,0.029823999851942062
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,7168,65536,0.3155199885368347
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,7168,3584,0.03468799963593483
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,7168,3584,0.025696000084280968
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,7168,4096,0.031072000041604042
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,7168,3584,0.0289280004799366
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,7168,3072,0.02457600086927414
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,7168,3072,0.02454400062561035
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,7168,2560,0.021183999255299568
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,7168,3072,0.02579200081527233
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,7168,2560,0.021344000473618507
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,7168,2560,0.026367999613285065
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,7168,2048,0.01849599927663803
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,7168,65536,0.4107840061187744
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,7168,2048,0.018592000007629395
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,7168,2048,0.022975999861955643
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,7168,1536,0.01600000075995922
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,7168,1536,0.0163199994713068
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,7168,1536,0.01990400068461895
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,7168,1024,0.013504000380635262
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,7168,1024,0.015647999942302704
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,7168,768,0.013248000293970108
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,7168,768,0.01244799979031086
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,7168,1024,0.016416000202298164
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,7168,768,0.01398400031030178
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,7168,512,0.011327999643981457
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,7168,512,0.011935999616980553
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,7168,512,0.012512000277638435
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,7168,256,0.009727999567985535
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,7168,256,0.009824000298976898
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,7168,256,0.010367999784648418
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,7168,128,0.008895999751985073
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,7168,128,0.009759999811649323
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,7168,128,0.009759999811649323
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,7168,64,0.012640000320971012
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,7168,64,0.012768000364303589
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,7168,64,0.00886400043964386
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,7168,32,0.013728000223636627
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,7168,32,0.013120000250637531
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,7168,32,0.009344000369310379
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,6144,12288,0.06684800237417221
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,6144,12288,0.0663359984755516
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,6144,16384,0.08022399991750717
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,6144,16384,0.07887999713420868
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,6144,16384,0.088128000497818
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,6144,12288,0.06601600348949432
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,6144,10240,0.06003199890255928
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,6144,10240,0.059487998485565186
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,6144,10240,0.05756799876689911
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,6144,8192,0.057472001761198044
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,6144,8192,0.049855999648571014
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,6144,7168,0.050783999264240265
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,6144,65536,0.24556800723075867
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,6144,8192,0.045504000037908554
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,6144,7168,0.04451199993491173
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,6144,6144,0.046431999653577805
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,6144,7168,0.04131200164556503
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,6144,6144,0.055135998874902725
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,6144,5120,0.04265600070357323
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,6144,5120,0.04710400104522705
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,6144,6144,0.03654399886727333
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,6144,4096,0.028960000723600388
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,6144,5120,0.03167999908328056
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,6144,4096,0.03977600112557411
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,6144,4096,0.02630399912595749
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,6144,3584,0.03542400151491165
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,6144,3584,0.026208000257611275
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,6144,65536,0.24089600145816803
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,6144,3072,0.03167999908328056
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,6144,3584,0.025567999109625816
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,6144,3072,0.02489599958062172
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,6144,65536,0.33635199069976807
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,6144,2560,0.020864000543951988
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,6144,2560,0.02707199938595295
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,6144,3072,0.02191999927163124
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,6144,2048,0.018559999763965607
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,6144,2048,0.023520000278949738
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,6144,1536,0.016095999628305435
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,6144,2560,0.020416000857949257
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,6144,2048,0.01833599992096424
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,6144,1536,0.01635199971497059
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,6144,1024,0.013407999649643898
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,6144,1536,0.016127999871969223
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,6144,1024,0.015584000386297703
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,6144,768,0.012480000033974648
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,6144,768,0.01228800043463707
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,6144,1024,0.014047999866306782
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,6144,512,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,6144,768,0.012512000277638435
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,6144,512,0.01196799986064434
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,6144,512,0.01152000017464161
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,6144,256,0.010591999627649784
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,6144,256,0.009920000098645687
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,6144,256,0.009759999811649323
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,6144,128,0.008960000239312649
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,6144,128,0.009088000282645226
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,6144,128,0.009503999724984169
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,6144,64,0.012640000320971012
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,6144,64,0.012671999633312225
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,6144,32,0.013120000250637531
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,6144,64,0.009216000325977802
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,6144,32,0.013344000093638897
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,6144,32,0.009279999881982803
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,5120,12288,0.08054400235414505
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,5120,12288,0.07952000200748444
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,5120,16384,0.10236799716949463
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,5120,16384,0.07305599749088287
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,5120,16384,0.08211199939250946
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,5120,12288,0.06464000046253204
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,5120,10240,0.06803199648857117
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,5120,10240,0.05612799897789955
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,5120,10240,0.054655998945236206
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,5120,8192,0.04915200173854828
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,5120,8192,0.05116799846291542
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,5120,7168,0.050783999264240265
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,5120,7168,0.04291199892759323
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,5120,8192,0.04412800073623657
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,5120,65536,0.24460799992084503
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,5120,6144,0.04521600157022476
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,5120,7168,0.04041599854826927
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,5120,5120,0.03961599990725517
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,5120,6144,0.03884800150990486
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,5120,5120,0.034304000437259674
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,5120,6144,0.03590400144457817
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,5120,65536,0.21465599536895752
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,5120,4096,0.028704000636935234
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,5120,5120,0.030592000111937523
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,5120,4096,0.04153599962592125
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,5120,3584,0.02940800040960312
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,5120,4096,0.02566399984061718
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,5120,3584,0.0350399985909462
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,5120,65536,0.31913599371910095
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,5120,3584,0.024607999250292778
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,5120,3072,0.02380800060927868
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,5120,3072,0.024320000782608986
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,5120,2560,0.020800000056624413
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,5120,2560,0.020255999639630318
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,5120,2048,0.017664000391960144
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,5120,3072,0.02208000048995018
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,5120,2048,0.02348800003528595
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,5120,2048,0.018239999189972878
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,5120,1536,0.015744000673294067
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,5120,1536,0.01603199914097786
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,5120,1536,0.01603199914097786
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,5120,1024,0.013055999763309956
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,5120,1024,0.013407999649643898
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,5120,768,0.012000000104308128
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,5120,1024,0.013567999936640263
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,5120,768,0.014495999552309513
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,5120,512,0.01065600011497736
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,5120,768,0.012608000077307224
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,5120,512,0.010623999871313572
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,5120,256,0.010367999784648418
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,5120,512,0.01104000024497509
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,5120,256,0.009600000455975533
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,5120,128,0.008767999708652496
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,5120,256,0.010111999697983265
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,5120,128,0.008927999995648861
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,5120,128,0.008895999751985073
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,5120,64,0.012799999676644802
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,5120,2560,0.02067199908196926
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,5120,64,0.012736000120639801
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,5120,64,0.008736000396311283
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,5120,32,0.013024000450968742
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,5120,32,0.012896000407636166
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,5120,32,0.008991999551653862
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,4096,12288,0.05087999999523163
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,4096,16384,0.08508799970149994
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,4096,16384,0.06275200098752975
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,4096,12288,0.051552001386880875
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,4096,16384,0.057440001517534256
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,4096,12288,0.046112000942230225
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,4096,10240,0.045823998749256134
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,4096,10240,0.056992001831531525
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,4096,10240,0.040832001715898514
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,4096,8192,0.03728000074625015
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,4096,7168,0.03356799855828285
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,4096,8192,0.0395519994199276
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,4096,65536,0.1823360025882721
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,4096,8192,0.03356799855828285
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,4096,7168,0.03711999952793121
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,4096,65536,0.19942399859428406
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,4096,7168,0.02985600009560585
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,4096,6144,0.03763199970126152
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,4096,6144,0.031072000041604042
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,4096,5120,0.027264000847935677
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,4096,6144,0.026559999212622643
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,4096,5120,0.04009599983692169
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,4096,4096,0.023520000278949738
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,4096,5120,0.022943999618291855
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,4096,4096,0.03359999880194664
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,4096,3584,0.021407999098300934
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,4096,3584,0.02112000063061714
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,4096,4096,0.020031999796628952
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,4096,65536,0.22726400196552277
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,4096,3584,0.01958400011062622
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,4096,3072,0.019680000841617584
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,4096,3072,0.027424000203609467
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,4096,3072,0.017311999574303627
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,4096,2560,0.020255999639630318
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,4096,2560,0.02364799939095974
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,4096,2048,0.017855999991297722
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,4096,2560,0.016736000776290894
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,4096,2048,0.020640000700950623
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,4096,1536,0.01532800029963255
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,4096,2048,0.01484800036996603
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,4096,1536,0.013663999736309052
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,4096,1024,0.012384000234305859
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,4096,1024,0.01158399973064661
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,4096,1536,0.016607999801635742
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,4096,768,0.012256000190973282
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,4096,768,0.01056000031530857
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,4096,768,0.011359999887645245
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,4096,1024,0.01196799986064434
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,4096,512,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,4096,512,0.011487999930977821
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,4096,256,0.008895999751985073
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,4096,512,0.011168000288307667
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,4096,256,0.008736000396311283
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,4096,256,0.009727999567985535
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,4096,128,0.008767999708652496
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,4096,128,0.008927999995648861
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,4096,64,0.012191999703645706
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,4096,128,0.008960000239312649
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,4096,64,0.012608000077307224
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,4096,64,0.008383999578654766
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,4096,32,0.013824000023305416
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,4096,32,0.012223999947309494
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,4096,32,0.008191999979317188
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,3584,12288,0.047231998294591904
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,3584,16384,0.05756799876689911
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,3584,16384,0.06911999732255936
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,3584,16384,0.05692800134420395
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,3584,12288,0.04383999854326248
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,3584,10240,0.04342399910092354
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,3584,10240,0.05692800134420395
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,3584,8192,0.03750399872660637
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,3584,12288,0.04879999905824661
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,3584,10240,0.03855999931693077
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,3584,8192,0.03888000175356865
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,3584,7168,0.042527999728918076
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,3584,8192,0.032287999987602234
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,3584,7168,0.03516799956560135
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,3584,65536,0.16396799683570862
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,3584,7168,0.02921600081026554
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,3584,6144,0.03814399987459183
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,3584,65536,0.1801919937133789
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,3584,6144,0.030719999223947525
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,3584,5120,0.03372799977660179
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,3584,6144,0.02672000043094158
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,3584,5120,0.027615999802947044
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,3584,5120,0.022943999618291855
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,3584,4096,0.028351999819278717
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,3584,4096,0.02377600036561489
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,3584,4096,0.01990400068461895
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,3584,3584,0.025151999667286873
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,3584,65536,0.2128639966249466
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,3584,3584,0.02115200087428093
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,3584,3584,0.019551999866962433
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,3584,3072,0.020096000283956528
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,3584,3072,0.019360000267624855
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,3584,2560,0.01648000068962574
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,3584,3072,0.016896000131964684
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,3584,2048,0.015424000099301338
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,3584,2560,0.01696000061929226
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,3584,2560,0.01651199907064438
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,3584,2048,0.015647999942302704
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,3584,1536,0.013344000093638897
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,3584,1536,0.013472000136971474
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,3584,2048,0.014816000126302242
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,3584,1536,0.013183999806642532
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,3584,1024,0.011680000461637974
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,3584,1024,0.011744000017642975
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,3584,1024,0.012160000391304493
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,3584,768,0.012191999703645706
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,3584,768,0.010528000071644783
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,3584,512,0.01119999960064888
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,3584,768,0.013504000380635262
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,3584,256,0.008927999995648861
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,3584,512,0.011680000461637974
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,3584,512,0.009920000098645687
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,3584,256,0.00886400043964386
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,3584,256,0.009375999681651592
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,3584,128,0.008608000352978706
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,3584,128,0.008832000195980072
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,3584,64,0.012736000120639801
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,3584,128,0.00854399986565113
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,3584,32,0.012256000190973282
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,3584,64,0.01190400030463934
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,3584,64,0.00854399986565113
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,3584,32,0.012608000077307224
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,3584,32,0.00848000030964613
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,3072,12288,0.05507199838757515
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,3072,12288,0.04419200122356415
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,3072,16384,0.05347200110554695
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,3072,16384,0.06207999959588051
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,3072,16384,0.056543998420238495
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,3072,12288,0.04428799822926521
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,3072,10240,0.05164799839258194
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,3072,10240,0.05939200147986412
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,3072,10240,0.038656000047922134
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,3072,8192,0.047648001462221146
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,3072,8192,0.036896001547575
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,3072,7168,0.03376000002026558
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,3072,8192,0.03251200169324875
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,3072,65536,0.14582400023937225
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,3072,7168,0.03446400165557861
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,3072,6144,0.03776000067591667
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,3072,6144,0.039103999733924866
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,3072,7168,0.029472000896930695
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,3072,5120,0.026559999212622643
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,3072,6144,0.025855999439954758
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,3072,5120,0.027648000046610832
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,3072,65536,0.1475840061903
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,3072,4096,0.028224000707268715
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,3072,4096,0.024000000208616257
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,3072,3584,0.024671999737620354
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,3072,65536,0.20342400670051575
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,3072,4096,0.020031999796628952
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,3072,5120,0.02316799946129322
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,3072,3584,0.020608000457286835
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,3072,3584,0.019231999292969704
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,3072,3072,0.027744000777602196
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,3072,3072,0.02377600036561489
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,3072,2560,0.01724799908697605
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,3072,2560,0.016704000532627106
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,3072,3072,0.016607999801635742
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,3072,2560,0.017023999243974686
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,3072,2048,0.01775999926030636
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,3072,2048,0.020447999238967896
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,3072,1536,0.013344000093638897
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,3072,2048,0.015104000456631184
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,3072,1536,0.01727999933063984
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,3072,1024,0.013472000136971474
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,3072,1024,0.011680000461637974
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,3072,1536,0.013120000250637531
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,3072,1024,0.01206399966031313
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,3072,768,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,3072,512,0.009696000255644321
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,3072,768,0.012608000077307224
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,3072,512,0.011136000044643879
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,3072,256,0.009247999638319016
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,3072,512,0.0098879998549819
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,3072,256,0.008511999621987343
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,3072,128,0.00854399986565113
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,3072,256,0.009727999567985535
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,3072,128,0.009440000168979168
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,3072,64,0.012543999589979649
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,3072,128,0.008767999708652496
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,3072,64,0.01228800043463707
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,3072,32,0.012703999876976013
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,3072,64,0.008224000222980976
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,3072,32,0.012223999947309494
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,3072,32,0.008671999908983707
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,3072,768,0.011648000217974186
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,2560,12288,0.058240000158548355
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,2560,16384,0.0607680007815361
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,2560,16384,0.04944000020623207
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,2560,12288,0.04182400181889534
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,2560,16384,0.05603199824690819
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,2560,12288,0.04396799951791763
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,2560,10240,0.05276799947023392
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,2560,10240,0.03791999816894531
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,2560,8192,0.04774399846792221
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,2560,10240,0.03862399980425835
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,2560,8192,0.034272000193595886
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,2560,8192,0.03452799841761589
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,2560,65536,0.15459200739860535
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,2560,7168,0.04294399917125702
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,2560,7168,0.03276799991726875
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,2560,65536,0.13526399433612823
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,2560,6144,0.029664000496268272
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,2560,5120,0.02723200060427189
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,2560,6144,0.030400000512599945
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,2560,7168,0.029279999434947968
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,2560,5120,0.026655999943614006
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,2560,6144,0.027135999873280525
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,2560,65536,0.20284800231456757
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,2560,4096,0.02364799939095974
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,2560,4096,0.022624000906944275
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,2560,3584,0.020191999152302742
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,2560,3584,0.020608000457286835
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,2560,5120,0.023135999217629433
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,2560,4096,0.020320000126957893
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,2560,3584,0.019328000023961067
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,2560,3072,0.0197759997099638
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,2560,2560,0.020255999639630318
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,2560,3072,0.02022399939596653
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,2560,3072,0.01696000061929226
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,2560,2560,0.016767999157309532
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,2560,2560,0.016095999628305435
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,2560,2048,0.017664000391960144
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,2560,2048,0.01500799972563982
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,2560,1536,0.01568000018596649
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,2560,1024,0.011455999687314034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,2560,2048,0.016607999801635742
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,2560,1536,0.013376000337302685
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,2560,1024,0.011680000461637974
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,2560,1536,0.013824000023305416
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,2560,1024,0.0144640002399683
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,2560,768,0.011839999817311764
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,2560,768,0.010591999627649784
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,2560,512,0.010239999741315842
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,2560,512,0.009503999724984169
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,2560,768,0.01321600005030632
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,2560,256,0.009696000255644321
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,2560,512,0.011264000087976456
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,2560,256,0.009631999768316746
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,2560,256,0.008608000352978706
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,2560,128,0.008960000239312649
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,2560,128,0.008767999708652496
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,2560,128,0.008287999778985977
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,2560,64,0.011839999817311764
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,2560,64,0.011711999773979187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,2560,32,0.012032000347971916
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,2560,32,0.011839999817311764
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,2560,64,0.008511999621987343
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,2560,32,0.008991999551653862
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,2048,12288,0.03791999816894531
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,2048,12288,0.04339199885725975
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,2048,16384,0.043136000633239746
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,2048,16384,0.04355200007557869
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,2048,16384,0.036479998379945755
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,2048,12288,0.029472000896930695
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,2048,10240,0.03711999952793121
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,2048,10240,0.03440000116825104
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,2048,10240,0.02502400055527687
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,2048,8192,0.037248000502586365
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,2048,8192,0.034143999218940735
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,2048,65536,0.12406399846076965
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,2048,7168,0.030848000198602676
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,2048,8192,0.021344000473618507
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,2048,7168,0.03126399964094162
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,2048,65536,0.13222399353981018
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,2048,7168,0.020191999152302742
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,2048,6144,0.02796800062060356
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,2048,5120,0.024351999163627625
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,2048,5120,0.02595200017094612
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,2048,5120,0.0161920003592968
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,2048,6144,0.01833599992096424
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,2048,4096,0.022016000002622604
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,2048,65536,0.12140800058841705
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,2048,4096,0.021407999098300934
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,2048,3584,0.02006400004029274
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,2048,3584,0.03001599945127964
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,2048,6144,0.029120000079274178
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,2048,4096,0.014688000082969666
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,2048,3072,0.018751999363303185
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,2048,3584,0.014879999682307243
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,2048,3072,0.02800000086426735
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,2048,2560,0.016127999871969223
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,2048,3072,0.013151999562978745
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,2048,2560,0.02364799939095974
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,2048,2048,0.014399999752640724
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,2048,2560,0.013024000450968742
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,2048,2048,0.014175999909639359
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,2048,2048,0.012415999546647072
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,2048,1536,0.01283199992030859
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,2048,1536,0.01711999997496605
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,2048,1024,0.010975999757647514
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,2048,1024,0.010528000071644783
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,2048,1536,0.012032000347971916
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,2048,1024,0.010400000028312206
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,2048,768,0.010400000028312206
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,2048,768,0.012575999833643436
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,2048,768,0.011392000131309032
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,2048,256,0.008671999908983707
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,2048,512,0.009759999811649323
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,2048,512,0.009440000168979168
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,2048,512,0.00940799992531538
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,2048,256,0.00848000030964613
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,2048,256,0.008991999551653862
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,2048,128,0.008128000423312187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,2048,128,0.008063999935984612
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,2048,128,0.008799999952316284
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,2048,64,0.011615999974310398
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,2048,64,0.011648000217974186
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,2048,64,0.008352000266313553
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,2048,32,0.011872000060975552
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,2048,32,0.012160000391304493
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,2048,32,0.008576000109314919
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,1536,12288,0.03743999823927879
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,1536,12288,0.03619199991226196
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,1536,16384,0.042847998440265656
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,1536,16384,0.04057599976658821
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,1536,16384,0.035551998764276505
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,1536,12288,0.02812799997627735
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,1536,10240,0.03551999852061272
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,1536,10240,0.034591998904943466
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,1536,10240,0.024768000468611717
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,1536,8192,0.03359999880194664
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,1536,8192,0.03136000037193298
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,1536,7168,0.032255999743938446
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,1536,8192,0.031007999554276466
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,1536,65536,0.1058880016207695
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,1536,7168,0.028672000393271446
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,1536,65536,0.09465599805116653
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,1536,7168,0.01974399946630001
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,1536,6144,0.030208000913262367
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,1536,6144,0.026496000587940216
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,1536,5120,0.02675200067460537
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,1536,6144,0.020096000283956528
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,1536,5120,0.024671999737620354
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,1536,5120,0.01603199914097786
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,1536,4096,0.022143999114632607
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,1536,4096,0.033535998314619064
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,1536,65536,0.11961600184440613
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,1536,4096,0.014399999752640724
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,1536,3584,0.019071999937295914
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,1536,3584,0.019360000267624855
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,1536,3072,0.018912000581622124
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,1536,3072,0.01788800023496151
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,1536,3584,0.015200000256299973
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,1536,2560,0.0163199994713068
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,1536,3072,0.012864000163972378
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,1536,2560,0.01548799965530634
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,1536,2048,0.014208000153303146
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,1536,2560,0.015552000142633915
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,1536,2048,0.013952000066637993
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,1536,1536,0.01235199999064207
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,1536,1536,0.012128000147640705
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,1536,2048,0.01228800043463707
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,1536,1536,0.011680000461637974
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,1536,1024,0.01142400037497282
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,1536,1024,0.01408000010997057
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,1536,1024,0.010495999827980995
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,1536,768,0.010048000141978264
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,1536,768,0.009920000098645687
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,1536,512,0.009216000325977802
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,1536,768,0.009855999611318111
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,1536,512,0.009056000038981438
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,1536,512,0.010367999784648418
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,1536,256,0.008799999952316284
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,1536,256,0.008960000239312649
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,1536,256,0.008608000352978706
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,1536,128,0.008063999935984612
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,1536,128,0.008031999692320824
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,1536,128,0.008287999778985977
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,1536,64,0.011615999974310398
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,1536,64,0.008224000222980976
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,1536,32,0.012160000391304493
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,1536,32,0.011711999773979187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,1536,32,0.00825599953532219
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,1024,12288,0.03641600161790848
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,1024,16384,0.04009599983692169
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,1024,16384,0.03667199984192848
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,1536,64,0.011648000217974186
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,1024,16384,0.05548800155520439
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,1024,12288,0.03702399879693985
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,1024,12288,0.042367998510599136
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,1024,10240,0.03580800071358681
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,1024,8192,0.03340800106525421
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,1024,10240,0.03465599939227104
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,1024,10240,0.024320000782608986
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,1024,8192,0.026528000831604004
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,1024,65536,0.07916799932718277
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,1024,65536,0.09808000177145004
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,1024,8192,0.021215999498963356
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,1024,7168,0.0261439997702837
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,1024,7168,0.02582399919629097
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,1024,7168,0.019231999292969704
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,1024,6144,0.027327999472618103
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,1024,6144,0.024064000695943832
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,1024,5120,0.02457600086927414
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,1024,6144,0.017632000148296356
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,1024,5120,0.024000000208616257
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,1024,5120,0.015744000673294067
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,1024,4096,0.021727999672293663
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,1024,65536,0.1165120005607605
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,1024,4096,0.02099199965596199
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,1024,4096,0.018464000895619392
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,1024,3584,0.018559999763965607
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,1024,3584,0.01849599927663803
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,1024,3584,0.015615999698638916
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,1024,3072,0.017855999991297722
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,1024,3072,0.012671999633312225
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,1024,3072,0.017855999991297722
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,1024,2560,0.015359999611973763
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,1024,2560,0.01548799965530634
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,1024,2048,0.014431999996304512
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,1024,2560,0.012736000120639801
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,1024,2048,0.013567999936640263
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,1024,1536,0.012095999903976917
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,1024,2048,0.01190400030463934
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,1024,1536,0.012256000190973282
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,1024,1024,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,1024,1536,0.011680000461637974
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,1024,1024,0.010688000358641148
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,1024,1024,0.010367999784648418
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,1024,768,0.00979200005531311
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,1024,768,0.010015999898314476
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,1024,512,0.009375999681651592
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,1024,768,0.011807999573647976
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,1024,512,0.00902399979531765
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,1024,512,0.010495999827980995
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,1024,256,0.008736000396311283
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,1024,256,0.008671999908983707
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,1024,256,0.008799999952316284
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,1024,128,0.008287999778985977
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,1024,128,0.007968000136315823
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,1024,128,0.008191999979317188
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,1024,64,0.011872000060975552
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,1024,64,0.011648000217974186
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,1024,32,0.011839999817311764
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,1024,64,0.00825599953532219
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,1024,32,0.011296000331640244
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,1024,32,0.0080960001796484
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,768,12288,0.030400000512599945
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,768,12288,0.03001599945127964
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,768,16384,0.041152000427246094
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,768,16384,0.035360001027584076
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,768,16384,0.034591998904943466
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,768,12288,0.027744000777602196
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,768,10240,0.029503999277949333
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,768,10240,0.029120000079274178
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,768,8192,0.027488000690937042
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,768,10240,0.03651199862360954
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,768,8192,0.02687999978661537
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,768,8192,0.030527999624609947
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,768,65536,0.06803199648857117
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,768,7168,0.02595200017094612
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,768,7168,0.025407999753952026
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,768,6144,0.024288000538945198
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,768,65536,0.06908799707889557
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,768,7168,0.028351999819278717
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,768,6144,0.02348800003528595
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,768,6144,0.024447999894618988
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,768,5120,0.022112000733613968
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,768,5120,0.022272000089287758
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,768,4096,0.020416000857949257
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,768,5120,0.015807999297976494
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,768,4096,0.021088000386953354
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,768,65536,0.11718399822711945
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,768,3584,0.018432000651955605
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,768,3072,0.017791999503970146
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,768,3584,0.018079999834299088
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,768,4096,0.014175999909639359
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,768,3584,0.015359999611973763
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,768,3072,0.012608000077307224
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,768,2560,0.015359999611973763
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,768,2048,0.014720000326633453
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,768,2560,0.016127999871969223
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,768,2048,0.011807999573647976
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,768,2560,0.013120000250637531
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,768,2048,0.013632000423967838
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,768,1536,0.011711999773979187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,768,1536,0.011552000418305397
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,768,1536,0.012256000190973282
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,768,1024,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,768,768,0.009855999611318111
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,768,1024,0.010304000228643417
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,768,1024,0.009983999654650688
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,768,3072,0.018912000581622124
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,768,768,0.011008000001311302
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,768,512,0.009216000325977802
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,768,256,0.008224000222980976
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,768,768,0.012319999746978283
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,768,512,0.00902399979531765
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,768,512,0.010048000141978264
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,768,256,0.008128000423312187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,768,128,0.008128000423312187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,768,256,0.008960000239312649
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,768,64,0.011711999773979187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,768,64,0.011487999930977821
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,768,128,0.007807999849319458
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,768,32,0.011615999974310398
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,768,128,0.0080960001796484
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,768,64,0.007807999849319458
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,768,32,0.011776000261306763
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,768,32,0.0081599997356534
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,512,12288,0.02940800040960312
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,512,12288,0.02755199931561947
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,512,16384,0.03161599859595299
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,512,16384,0.03126399964094162
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,512,16384,0.0395519994199276
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,512,12288,0.042080000042915344
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,512,10240,0.029055999591946602
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,512,10240,0.025312000885605812
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,512,10240,0.035840000957250595
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,512,8192,0.02380800060927868
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,512,8192,0.02425600029528141
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,512,7168,0.026688000187277794
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,512,8192,0.030047999694943428
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,512,7168,0.02751999907195568
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,512,7168,0.0226879995316267
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,512,65536,0.06675200164318085
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,512,6144,0.0272000003606081
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,512,65536,0.06374400109052658
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,512,6144,0.025728000327944756
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,512,4096,0.022048000246286392
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,512,5120,0.020320000126957893
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,512,6144,0.024607999250292778
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,512,5120,0.020191999152302742
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,512,4096,0.02115200087428093
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,512,5120,0.015936000272631645
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,512,4096,0.018271999433636665
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,512,65536,0.11695999652147293
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,512,3584,0.017632000148296356
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,512,3584,0.018400000408291817
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,512,3072,0.017216000705957413
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,512,3584,0.017952000722289085
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,512,3072,0.018239999189972878
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,512,3072,0.012575999833643436
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,512,2560,0.016095999628305435
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,512,2560,0.015168000012636185
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,512,2048,0.014208000153303146
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,512,2560,0.012512000277638435
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,512,1536,0.012512000277638435
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,512,2048,0.013439999893307686
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,512,1536,0.012191999703645706
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,512,2048,0.01375999953597784
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,512,1536,0.010688000358641148
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,512,1024,0.010463999584317207
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,512,1024,0.010975999757647514
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,512,1024,0.013567999936640263
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,512,768,0.009472000412642956
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,512,768,0.009727999567985535
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,512,768,0.012256000190973282
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,512,512,0.008927999995648861
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,512,512,0.008991999551653862
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,512,512,0.010239999741315842
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,512,256,0.008191999979317188
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,512,256,0.00848000030964613
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,512,128,0.007615999784320593
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,512,128,0.0080960001796484
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,512,256,0.008352000266313553
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,512,64,0.011552000418305397
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,512,128,0.008224000222980976
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,512,64,0.01142400037497282
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,512,64,0.007840000092983246
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,512,32,0.01152000017464161
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,512,32,0.011615999974310398
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,512,32,0.007648000027984381
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,256,12288,0.026016000658273697
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,256,16384,0.02921600081026554
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,256,16384,0.02848000079393387
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,256,16384,0.034432001411914825
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,256,12288,0.0416640006005764
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,256,10240,0.024992000311613083
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,256,10240,0.02534399926662445
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,256,65536,0.056671999394893646
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,256,12288,0.026240000501275063
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,256,8192,0.022975999861955643
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,256,10240,0.03753599897027016
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,256,65536,0.050592001527547836
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,256,8192,0.022943999618291855
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,256,8192,0.03097599931061268
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,256,7168,0.023840000852942467
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,256,7168,0.022143999114632607
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,256,6144,0.021056000143289566
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,256,7168,0.02755199931561947
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,256,6144,0.0208320003002882
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,256,6144,0.01740800030529499
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,256,5120,0.02332800067961216
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,256,5120,0.019840000197291374
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,256,65536,0.11641599982976913
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,256,4096,0.019039999693632126
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,256,4096,0.01958400011062622
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,256,5120,0.015936000272631645
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,256,4096,0.014240000396966934
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,256,3584,0.017472000792622566
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,256,3584,0.01836800016462803
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,256,3072,0.018464000895619392
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,256,3584,0.013887999579310417
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,256,3072,0.017343999817967415
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,256,2560,0.014751999638974667
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,256,3072,0.012223999947309494
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,256,2560,0.015424000099301338
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,256,2048,0.013183999806642532
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,256,2560,0.012480000033974648
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,256,2048,0.013439999893307686
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,256,2048,0.011231999844312668
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,256,1536,0.012384000234305859
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,256,1536,0.011711999773979187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,256,1024,0.010528000071644783
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,256,1024,0.010304000228643417
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,256,1536,0.010688000358641148
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,256,1024,0.009983999654650688
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,256,768,0.00979200005531311
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,256,768,0.00979200005531311
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,256,768,0.01196799986064434
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,256,512,0.008767999708652496
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,256,512,0.00886400043964386
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,256,256,0.007903999648988247
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,256,256,0.00825599953532219
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,256,512,0.010239999741315842
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,256,256,0.00886400043964386
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,256,128,0.0074880002066493034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,256,128,0.007679999805986881
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,256,64,0.011680000461637974
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,256,128,0.007935999892652035
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,256,64,0.011552000418305397
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,256,32,0.011392000131309032
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,256,64,0.007968000136315823
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,256,32,0.011327999643981457
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,256,32,0.007903999648988247
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,128,12288,0.024064000695943832
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,128,12288,0.033695999532938004
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,128,16384,0.03478400036692619
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,128,16384,0.02598400041460991
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,128,16384,0.04057599976658821
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,128,12288,0.04371200129389763
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,128,10240,0.024351999163627625
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,128,10240,0.023679999634623528
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,128,10240,0.024351999163627625
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,128,8192,0.02223999984562397
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,128,8192,0.02236800082027912
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,128,7168,0.023135999217629433
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,128,8192,0.02070399932563305
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,128,65536,0.048895999789237976
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,128,65536,0.04694399982690811
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,128,7168,0.022655999287962914
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,128,6144,0.022336000576615334
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,128,7168,0.02812799997627735
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,128,6144,0.022207999601960182
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,128,5120,0.021247999742627144
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,128,5120,0.020896000787615776
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,128,6144,0.017503999173641205
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,128,5120,0.01727999933063984
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,128,65536,0.1162559986114502
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,128,4096,0.019200000911951065
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,128,4096,0.019392000511288643
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,128,4096,0.014112000353634357
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,128,3584,0.018464000895619392
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,128,3584,0.018432000651955605
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,128,3072,0.01708799973130226
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,128,3584,0.01408000010997057
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,128,2560,0.015584000386297703
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,128,3072,0.01756799966096878
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,128,3072,0.012095999903976917
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,128,2560,0.015200000256299973
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,128,2560,0.015263999812304974
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,128,2048,0.013151999562978745
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,128,2048,0.013504000380635262
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,128,1536,0.011455999687314034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,128,1536,0.012032000347971916
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,128,2048,0.011552000418305397
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,128,1536,0.010623999871313572
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,128,1024,0.010495999827980995
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,128,1024,0.010528000071644783
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,128,768,0.009855999611318111
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,128,512,0.00886400043964386
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,128,768,0.009440000168979168
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,128,768,0.011359999887645245
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,128,512,0.008576000109314919
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,128,512,0.010111999697983265
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,128,256,0.007968000136315823
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,128,256,0.00825599953532219
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,128,128,0.007679999805986881
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,128,128,0.007679999805986881
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,128,256,0.00848000030964613
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,128,128,0.00825599953532219
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,128,64,0.011455999687314034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,128,64,0.011615999974310398
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,128,64,0.007903999648988247
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,128,32,0.01152000017464161
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,128,32,0.011648000217974186
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,128,32,0.007935999892652035
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,128,1024,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,64,12288,0.024639999493956566
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,64,12288,0.024224000051617622
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,64,16384,0.027456000447273254
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,64,16384,0.02582399919629097
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,64,16384,0.03440000116825104
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,64,12288,0.02751999907195568
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,64,10240,0.02486399933695793
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,64,10240,0.02380800060927868
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,64,10240,0.03625600039958954
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,64,8192,0.023104000836610794
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,64,8192,0.022592000663280487
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,64,65536,0.047839999198913574
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,64,65536,0.046112000942230225
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,64,8192,0.030112000182271004
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,64,7168,0.021824000403285027
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,64,7168,0.02316799946129322
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,64,7168,0.0191040001809597
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,64,6144,0.02319999970495701
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,64,6144,0.02131200022995472
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,64,5120,0.02175999991595745
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,64,5120,0.0197759997099638
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,64,6144,0.01756799966096878
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,64,5120,0.015744000673294067
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,64,4096,0.01926399953663349
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,64,65536,0.14441600441932678
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,64,4096,0.013919999822974205
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,64,4096,0.018783999606966972
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,64,3584,0.019231999292969704
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,64,3584,0.018112000077962875
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,64,3072,0.01708799973130226
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,64,3584,0.015519999898970127
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,64,3072,0.017855999991297722
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,64,2560,0.014944000169634819
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,64,2560,0.014688000082969666
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,64,3072,0.013407999649643898
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,64,2560,0.014655999839305878
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,64,2048,0.01375999953597784
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,64,1536,0.01196799986064434
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,64,2048,0.013439999893307686
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,64,1536,0.011744000017642975
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,64,1024,0.010080000385642052
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,64,2048,0.011455999687314034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,64,1536,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,64,1024,0.01065600011497736
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,64,1024,0.010463999584317207
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,64,768,0.009503999724984169
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,64,768,0.009696000255644321
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,64,512,0.008767999708652496
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,64,768,0.009664000011980534
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,64,512,0.008927999995648861
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,64,512,0.009568000212311745
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,64,256,0.007935999892652035
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,64,256,0.008608000352978706
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,64,128,0.00825599953532219
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,64,64,0.011296000331640244
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,64,128,0.00774399982765317
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,64,256,0.00863999966531992
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,64,64,0.011392000131309032
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,64,128,0.007935999892652035
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,64,64,0.008320000022649765
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,64,32,0.011231999844312668
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,64,32,0.011392000131309032
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,64,32,0.007648000027984381
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,32,12288,0.024383999407291412
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,32,16384,0.027103999629616737
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,32,12288,0.02537599951028824
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,32,16384,0.026655999943614006
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,32,16384,0.04915200173854828
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,32,12288,0.040608000010252
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,32,10240,0.024383999407291412
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,32,10240,0.023615999147295952
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,32,8192,0.022911999374628067
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,32,10240,0.03392000123858452
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,32,7168,0.023871999233961105
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,32,8192,0.027936000376939774
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,32,65536,0.04742399975657463
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,32,65536,0.04543999955058098
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,32,7168,0.02319999970495701
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,32,7168,0.02627200074493885
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,32,6144,0.023423999547958374
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,32,6144,0.020735999569296837
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,32,5120,0.022048000246286392
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,32,6144,0.024064000695943832
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,32,5120,0.020255999639630318
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,32,65536,0.1976960003376007
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,32,8192,0.02348800003528595
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,32,4096,0.019519999623298645
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,32,4096,0.018624000251293182
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,32,5120,0.019519999623298645
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,32,4096,0.017632000148296356
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,32,3584,0.019392000511288643
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,32,3584,0.01820800080895424
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,32,3072,0.016831999644637108
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,32,3072,0.017952000722289085
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,32,3584,0.017503999173641205
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,32,3072,0.01484800036996603
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,32,2560,0.015263999812304974
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,32,2560,0.0144640002399683
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,32,2560,0.014655999839305878
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,32,2048,0.013824000023305416
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,32,2048,0.013407999649643898
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,32,1536,0.012543999589979649
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,32,2048,0.013248000293970108
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,32,1536,0.012128000147640705
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,32,1024,0.010367999784648418
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,32,1536,0.01152000017464161
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,32,1024,0.01017600018531084
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,32,1024,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,32,768,0.009312000125646591
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,32,768,0.009600000455975533
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,32,512,0.008895999751985073
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,32,768,0.010879999957978725
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,32,256,0.0080960001796484
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,32,512,0.008895999751985073
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,32,256,0.007807999849319458
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,32,512,0.009727999567985535
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,32,256,0.008736000396311283
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,32,128,0.007615999784320593
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,32,128,0.007391999941319227
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,32,64,0.011231999844312668
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,32,128,0.008287999778985977
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,32,64,0.011615999974310398
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,32,64,0.007903999648988247
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,256,32,32,0.01119999960064888
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,256,32,32,0.011487999930977821
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,256,32,32,0.0080960001796484
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,65536,12288,0.5490559935569763
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,65536,10240,0.45763200521469116
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,65536,16384,0.7299200296401978
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,65536,12288,0.6231039762496948
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,65536,10240,0.4153600037097931
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,65536,16384,0.6350719928741455
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,65536,8192,0.36323198676109314
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,65536,8192,0.3319680094718933
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,65536,8192,0.2808319926261902
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,65536,7168,0.34908801317214966
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,65536,12288,0.41414400935173035
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,65536,7168,0.31865599751472473
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,65536,7168,0.2502399981021881
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,65536,10240,0.34908801317214966
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,65536,5120,0.22867199778556824
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,65536,5120,0.22329600155353546
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,65536,6144,0.2568640112876892
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,65536,16384,0.5356159806251526
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,65536,6144,0.2581759989261627
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,65536,4096,0.17791999876499176
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,65536,4096,0.1815679967403412
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,65536,6144,0.2200320065021515
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,65536,3584,0.1539199948310852
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,65536,5120,0.18639999628067017
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,65536,3584,0.18409599363803864
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,65536,3072,0.13967999815940857
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,65536,4096,0.15196800231933594
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,65536,3072,0.14441600441932678
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,65536,2560,0.12937599420547485
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,65536,3584,0.13817599415779114
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,65536,2048,0.1035199984908104
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,65536,2560,0.21251200139522552
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,65536,3072,0.12227199971675873
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,65536,2560,0.10649599879980087
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,65536,2048,0.08934400230646133
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,65536,2048,0.1635199934244156
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,65536,1536,0.13529600203037262
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,65536,1536,0.08342400193214417
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,65536,1024,0.08796799927949905
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,65536,1024,0.06275200098752975
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,65536,1536,0.07414399832487106
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,65536,768,0.06668800115585327
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,65536,768,0.052032001316547394
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,65536,512,0.04755200073122978
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,65536,1024,0.0570559985935688
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,65536,512,0.04620800167322159
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,65536,768,0.05071999877691269
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,65536,256,0.0272000003606081
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,65536,512,0.04217600077390671
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,65536,128,0.021503999829292297
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,65536,128,0.020896000787615776
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,65536,256,0.026399999856948853
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,65536,128,0.022143999114632607
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,65536,64,0.022336000576615334
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,65536,64,0.02236800082027912
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,65536,32,0.030368000268936157
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,65536,32,0.029120000079274178
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,65536,64,0.02470399998128414
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,65536,32,0.019967999309301376
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,65536,256,0.029023999348282814
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,16384,16384,0.31833600997924805
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,16384,16384,0.1804800033569336
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,16384,12288,0.16140800714492798
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,16384,12288,0.23206399381160736
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,16384,16384,0.13900800049304962
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,16384,10240,0.19865599274635315
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,16384,12288,0.11100800335407257
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,16384,10240,0.12191999703645706
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,16384,8192,0.15459200739860535
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,16384,10240,0.091839998960495
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,16384,8192,0.08726400136947632
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,16384,65536,1.2451200485229492
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,16384,7168,0.07648000121116638
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,16384,7168,0.08137600123882294
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,16384,8192,0.07526399940252304
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,16384,6144,0.11939200013875961
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,16384,6144,0.07708799839019775
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,16384,7168,0.06931199878454208
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,16384,5120,0.05910399928689003
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,16384,5120,0.0663679987192154
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,16384,6144,0.060447998344898224
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,16384,4096,0.0496320016682148
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,16384,4096,0.050175998359918594
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,16384,5120,0.051872000098228455
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,16384,3584,0.043935999274253845
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,16384,3584,0.04556800052523613
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,16384,3584,0.039712000638246536
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,16384,4096,0.04291199892759323
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,16384,3072,0.06534399837255478
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,16384,3072,0.045791998505592346
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,16384,2560,0.05286400020122528
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,16384,2560,0.036031998693943024
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,16384,3072,0.03532800078392029
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,16384,2560,0.03129599988460541
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,16384,2048,0.04374400153756142
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,16384,2048,0.0461760014295578
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,16384,1536,0.03657599911093712
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,16384,2048,0.02659199945628643
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,16384,1536,0.03580800071358681
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,16384,65536,1.3817919492721558
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,16384,1536,0.022431999444961548
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,16384,1024,0.02672000043094158
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,16384,1024,0.02707199938595295
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,16384,768,0.022784000262618065
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,16384,1024,0.02115200087428093
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,16384,768,0.022048000246286392
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,16384,512,0.01740800030529499
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,16384,512,0.017184000462293625
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,16384,768,0.01727999933063984
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,16384,512,0.015424000099301338
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,16384,256,0.01321600005030632
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,16384,256,0.012608000077307224
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,16384,256,0.013376000337302685
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,16384,65536,0.5216320157051086
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,16384,128,0.011680000461637974
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,16384,128,0.011776000261306763
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,16384,64,0.014655999839305878
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,16384,128,0.011264000087976456
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,16384,64,0.014495999552309513
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,16384,64,0.009920000098645687
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,16384,32,0.015936000272631645
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,16384,32,0.016704000532627106
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,16384,32,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,12288,12288,0.13663999736309052
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,12288,16384,0.17027199268341064
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,12288,16384,0.1709119975566864
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,12288,12288,0.13689599931240082
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,12288,16384,0.13654400408267975
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,12288,10240,0.11935999989509583
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,12288,10240,0.11715199798345566
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,12288,12288,0.10876800119876862
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,12288,8192,0.095551997423172
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,12288,8192,0.09888000041246414
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,12288,10240,0.09113600105047226
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,12288,8192,0.07411199808120728
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,12288,7168,0.12822400033473969
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,12288,7168,0.07734400033950806
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,12288,65536,0.6200640201568604
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,12288,6144,0.10320000350475311
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,12288,5120,0.058720000088214874
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,12288,6144,0.06719999760389328
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,12288,7168,0.06691200286149979
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,12288,5120,0.059007998555898666
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,12288,6144,0.05814399942755699
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,12288,65536,0.6138880252838135
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,12288,4096,0.07264000177383423
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,12288,4096,0.04896000027656555
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,12288,5120,0.050912000238895416
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,12288,3584,0.04435199871659279
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,12288,4096,0.0424639992415905
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,12288,3584,0.04460800066590309
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,12288,3072,0.04041599854826927
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,12288,3072,0.041600000113248825
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,12288,2560,0.03497600182890892
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,12288,2560,0.04412800073623657
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,12288,3072,0.0342399999499321
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,12288,2560,0.03126399964094162
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,12288,2048,0.04540799930691719
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,12288,2048,0.038176000118255615
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,12288,2048,0.026496000587940216
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,12288,1536,0.0315839983522892
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,12288,1536,0.03203200176358223
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,12288,65536,0.5195199847221375
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,12288,1536,0.02473600022494793
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,12288,1024,0.025760000571608543
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,12288,1024,0.023423999547958374
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,12288,768,0.01926399953663349
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,12288,768,0.020255999639630318
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,12288,3584,0.0395519994199276
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,12288,1024,0.01740800030529499
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,12288,768,0.016448000445961952
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,12288,512,0.01600000075995922
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,12288,512,0.017696000635623932
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,12288,512,0.013504000380635262
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,12288,256,0.01228800043463707
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,12288,256,0.012864000163972378
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,12288,128,0.011680000461637974
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,12288,256,0.011103999800980091
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,12288,128,0.011103999800980091
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,12288,128,0.009440000168979168
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,12288,64,0.014527999795973301
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,12288,64,0.014208000153303146
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,12288,64,0.008671999908983707
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,12288,32,0.015039999969303608
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,12288,32,0.015936000272631645
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,12288,32,0.009119999594986439
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,10240,12288,0.1249919980764389
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,10240,12288,0.11619199812412262
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,10240,16384,0.14630399644374847
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,10240,16384,0.14732800424098969
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,10240,16384,0.1375039964914322
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,10240,12288,0.10726399719715118
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,10240,10240,0.1024319976568222
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,10240,10240,0.09852799773216248
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,10240,8192,0.13276800513267517
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,10240,10240,0.09132800251245499
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,10240,65536,0.6114240288734436
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,10240,7168,0.07430399954319
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,10240,8192,0.08224000036716461
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,10240,8192,0.0719359964132309
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,10240,7168,0.11132799834012985
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,10240,6144,0.06518399715423584
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,10240,6144,0.0684799998998642
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,10240,65536,0.5224320292472839
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,10240,7168,0.05689600110054016
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,10240,6144,0.05852799862623215
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,10240,5120,0.05798399820923805
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,10240,5120,0.0846719965338707
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,10240,4096,0.048576001077890396
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,10240,4096,0.04972799867391586
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,10240,5120,0.04841599985957146
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,10240,4096,0.035999998450279236
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,10240,3584,0.048576001077890396
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,10240,3584,0.0597120001912117
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,10240,3584,0.03302399814128876
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,10240,3072,0.04540799930691719
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,10240,3072,0.052480001002550125
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,10240,2560,0.034623999148607254
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,10240,3072,0.03187200054526329
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,10240,2560,0.034752000123262405
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,10240,2048,0.03788800165057182
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,10240,2560,0.029055999591946602
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,10240,2048,0.03728000074625015
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,10240,1536,0.030271999537944794
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,10240,1536,0.03062400035560131
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,10240,2048,0.024351999163627625
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,10240,1536,0.021088000386953354
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,10240,65536,0.5157759785652161
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,10240,1024,0.023711999878287315
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,10240,1024,0.022624000906944275
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,10240,768,0.020031999796628952
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,10240,768,0.01942400075495243
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,10240,1024,0.01696000061929226
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,10240,768,0.015072000212967396
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,10240,512,0.01571200042963028
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,10240,512,0.01571200042963028
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,10240,512,0.013279999606311321
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,10240,256,0.012512000277638435
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,10240,256,0.012095999903976917
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,10240,256,0.010432000271975994
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,10240,128,0.01142400037497282
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,10240,128,0.010879999957978725
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,10240,128,0.009119999594986439
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,10240,64,0.01414399966597557
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,10240,64,0.014560000039637089
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,10240,32,0.014944000169634819
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,10240,64,0.00825599953532219
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,10240,32,0.015072000212967396
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,10240,32,0.00848000030964613
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,8192,12288,0.09094399958848953
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,8192,12288,0.08966399729251862
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,8192,16384,0.11286400258541107
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,8192,16384,0.15996800363063812
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,8192,10240,0.0817599967122078
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,8192,12288,0.08841600269079208
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,8192,10240,0.10329599678516388
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,8192,8192,0.08089599758386612
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,8192,8192,0.07248000055551529
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,8192,10240,0.07286400347948074
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,8192,65536,0.43462398648262024
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,8192,8192,0.053888000547885895
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,8192,16384,0.1106560006737709
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,8192,7168,0.06444799900054932
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,8192,7168,0.08441600203514099
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,8192,6144,0.05798399820923805
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,8192,65536,0.612608015537262
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,8192,6144,0.07023999840021133
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,8192,7168,0.049984000623226166
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,8192,6144,0.046592000871896744
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,8192,5120,0.052191998809576035
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,8192,4096,0.04479999840259552
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,8192,5120,0.05673599988222122
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,8192,4096,0.052352000027894974
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,8192,3584,0.042208001017570496
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,8192,5120,0.041152000427246094
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,8192,4096,0.03142400085926056
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,8192,3584,0.04915200173854828
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,8192,3072,0.04179200157523155
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,8192,3072,0.03731200098991394
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,8192,3584,0.02908799983561039
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,8192,2560,0.030719999223947525
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,8192,3072,0.02505600079894066
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,8192,2560,0.028960000723600388
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,8192,65536,0.4312320053577423
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,8192,2048,0.02521600015461445
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,8192,2560,0.02598400041460991
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,8192,2048,0.0261439997702837
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,8192,1536,0.022463999688625336
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,8192,2048,0.02054399996995926
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,8192,1536,0.025439999997615814
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,8192,1536,0.016575999557971954
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,8192,1024,0.01759999990463257
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,8192,1024,0.016575999557971954
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,8192,768,0.015807999297976494
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,8192,1024,0.013856000266969204
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,8192,768,0.014560000039637089
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,8192,512,0.013055999763309956
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,8192,768,0.014655999839305878
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,8192,512,0.013887999579310417
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,8192,512,0.012480000033974648
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,8192,256,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,8192,256,0.01104000024497509
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,8192,256,0.0098879998549819
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,8192,128,0.010111999697983265
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,8192,128,0.01033599954098463
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,8192,128,0.008799999952316284
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,8192,64,0.013824000023305416
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,8192,64,0.01398400031030178
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,8192,32,0.014175999909639359
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,8192,64,0.0081599997356534
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,8192,32,0.014592000283300877
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,8192,32,0.008128000423312187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,7168,12288,0.08022399991750717
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,7168,12288,0.1120000034570694
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,7168,16384,0.08899199962615967
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,7168,16384,0.09641599655151367
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,7168,16384,0.0974079966545105
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,7168,10240,0.09942399710416794
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,7168,12288,0.08659200370311737
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,7168,10240,0.06803199648857117
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,7168,8192,0.07903999835252762
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,7168,8192,0.07894399762153625
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,7168,10240,0.07401599735021591
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,7168,8192,0.05407999828457832
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,7168,7168,0.07459200173616409
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,7168,65536,0.2807359993457794
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,7168,7168,0.0721919983625412
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,7168,6144,0.06281600147485733
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,7168,65536,0.40857601165771484
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,7168,7168,0.04886399954557419
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,7168,6144,0.06358399987220764
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,7168,5120,0.05536000058054924
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,7168,6144,0.04320000112056732
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,7168,5120,0.05321599915623665
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,7168,4096,0.04121600091457367
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,7168,4096,0.045343998819589615
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,7168,65536,0.28729599714279175
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,7168,3584,0.040063999593257904
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,7168,5120,0.040383998304605484
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,7168,4096,0.031072000041604042
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,7168,3584,0.037376001477241516
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,7168,3584,0.028416000306606293
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,7168,3072,0.038176000118255615
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,7168,3072,0.03513599932193756
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,7168,2560,0.034272000193595886
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,7168,2560,0.031168000772595406
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,7168,3072,0.028672000393271446
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,7168,2560,0.025439999997615814
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,7168,2048,0.026464000344276428
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,7168,2048,0.024447999894618988
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,7168,1536,0.023520000278949738
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,7168,1536,0.021824000403285027
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,7168,2048,0.021247999742627144
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,7168,1024,0.016896000131964684
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,7168,1024,0.01679999940097332
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,7168,1024,0.015296000055968761
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,7168,768,0.015231999568641186
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,7168,768,0.014751999638974667
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,7168,512,0.01283199992030859
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,7168,768,0.014271999709308147
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,7168,512,0.01408000010997057
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,7168,512,0.01206399966031313
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,7168,256,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,7168,256,0.011168000288307667
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,7168,256,0.009983999654650688
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,7168,128,0.010591999627649784
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,7168,128,0.010208000428974628
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,7168,128,0.009119999594986439
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,7168,64,0.013728000223636627
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,7168,64,0.013728000223636627
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,7168,64,0.008224000222980976
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,7168,32,0.014208000153303146
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,7168,32,0.014208000153303146
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,7168,32,0.008383999578654766
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,7168,1536,0.018783999606966972
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,6144,12288,0.07849600166082382
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,6144,12288,0.07116799801588058
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,6144,16384,0.08166400343179703
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,6144,16384,0.10380800068378448
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,6144,16384,0.0814720019698143
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,6144,12288,0.06697600334882736
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,6144,10240,0.06889600306749344
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,6144,10240,0.06419199705123901
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,6144,10240,0.05507199838757515
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,6144,8192,0.06665600091218948
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,6144,8192,0.07056000083684921
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,6144,8192,0.044576000422239304
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,6144,65536,0.24713599681854248
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,6144,7168,0.04966399818658829
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,6144,7168,0.06812799721956253
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,6144,65536,0.24329599738121033
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,6144,6144,0.05209600180387497
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,6144,6144,0.06723199784755707
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,6144,7168,0.04124800115823746
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,6144,5120,0.05305600166320801
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,6144,5120,0.0496320016682148
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,6144,6144,0.036288000643253326
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,6144,5120,0.031968001276254654
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,6144,4096,0.03951999917626381
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,6144,4096,0.04585599899291992
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,6144,3584,0.041152000427246094
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,6144,3584,0.04403200000524521
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,6144,4096,0.027456000447273254
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,6144,3584,0.024671999737620354
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,6144,3072,0.03500799834728241
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,6144,3072,0.0326399989426136
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,6144,3072,0.021663999184966087
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,6144,2560,0.02672000043094158
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,6144,2560,0.027904000133275986
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,6144,2048,0.025312000885605812
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,6144,2560,0.020447999238967896
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,6144,65536,0.3139840066432953
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,6144,2048,0.023296000435948372
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,6144,1536,0.020479999482631683
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,6144,2048,0.01692800037562847
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,6144,1536,0.01500799972563982
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,6144,1536,0.01974399946630001
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,6144,1024,0.016704000532627106
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,6144,1024,0.018592000007629395
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,6144,1024,0.012575999833643436
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,6144,768,0.014783999882638454
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,6144,768,0.015776000916957855
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,6144,512,0.013376000337302685
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,6144,512,0.01583999954164028
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,6144,256,0.011776000261306763
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,6144,768,0.012256000190973282
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,6144,512,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,6144,256,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,6144,256,0.008895999751985073
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,6144,128,0.010048000141978264
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,6144,128,0.009983999654650688
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,6144,64,0.013791999779641628
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,6144,128,0.00825599953532219
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,6144,64,0.013279999606311321
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,6144,32,0.014047999866306782
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,6144,64,0.008031999692320824
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,6144,32,0.014336000196635723
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,6144,32,0.007935999892652035
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,5120,12288,0.09529600292444229
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,5120,12288,0.06883200258016586
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,5120,16384,0.07360000163316727
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,5120,16384,0.07235199958086014
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,5120,16384,0.06111999973654747
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,5120,12288,0.06070400029420853
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,5120,10240,0.07948800176382065
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,5120,10240,0.054496001452207565
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,5120,8192,0.05644800141453743
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,5120,8192,0.06623999774456024
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,5120,7168,0.04979199916124344
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,5120,8192,0.03500799834728241
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,5120,65536,0.20943999290466309
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,5120,7168,0.057472001761198044
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,5120,7168,0.03152000159025192
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,5120,65536,0.40511998534202576
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,5120,6144,0.047775998711586
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,5120,6144,0.04428799822926521
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,5120,5120,0.04608000069856644
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,5120,5120,0.04521600157022476
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,5120,6144,0.02864000014960766
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,5120,5120,0.027327999472618103
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,5120,4096,0.0326399989426136
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,5120,4096,0.035711999982595444
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,5120,10240,0.048767998814582825
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,5120,3584,0.029983999207615852
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,5120,3584,0.04291199892759323
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,5120,4096,0.022943999618291855
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,5120,3584,0.02147199958562851
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,5120,3072,0.03356799855828285
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,5120,3072,0.03747199848294258
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,5120,3072,0.019071999937295914
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,5120,65536,0.24748800694942474
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,5120,2560,0.025312000885605812
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,5120,2560,0.023744000121951103
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,5120,2560,0.01788800023496151
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,5120,2048,0.026655999943614006
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,5120,2048,0.023903999477624893
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,5120,1536,0.017983999103307724
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,5120,1536,0.019328000023961067
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,5120,2048,0.01500799972563982
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,5120,1536,0.01360000018030405
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,5120,1024,0.01500799972563982
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,5120,1024,0.015744000673294067
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,5120,768,0.0144640002399683
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,5120,1024,0.01142400037497282
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,5120,768,0.013856000266969204
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,5120,768,0.011615999974310398
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,5120,512,0.012703999876976013
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,5120,512,0.01321600005030632
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,5120,512,0.010400000028312206
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,5120,256,0.010975999757647514
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,5120,256,0.011071999557316303
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,5120,256,0.00863999966531992
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,5120,128,0.010111999697983265
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,5120,128,0.010143999941647053
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,5120,128,0.007807999849319458
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,5120,64,0.013663999736309052
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,5120,64,0.013728000223636627
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,5120,64,0.007455999962985516
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,5120,32,0.014240000396966934
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,5120,32,0.013663999736309052
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,5120,32,0.007615999784320593
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,4096,12288,0.05135999992489815
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,4096,12288,0.07603199779987335
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,4096,16384,0.06947200000286102
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,4096,16384,0.06390400230884552
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,4096,16384,0.057312000542879105
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,4096,10240,0.06739199906587601
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,4096,12288,0.047231998294591904
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,4096,10240,0.045632001012563705
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,4096,10240,0.03968000039458275
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,4096,8192,0.0541439987719059
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,4096,8192,0.05270399898290634
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,4096,7168,0.048895999789237976
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,4096,8192,0.03244800120592117
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,4096,7168,0.040192000567913055
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,4096,65536,0.23267200589179993
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,4096,6144,0.04121600091457367
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,4096,7168,0.02940800040960312
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,4096,6144,0.040063999593257904
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,4096,65536,0.18252800405025482
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,4096,6144,0.02723200060427189
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,4096,5120,0.03721600025892258
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,4096,5120,0.03948799893260002
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,4096,4096,0.030239999294281006
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,4096,5120,0.02377600036561489
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,4096,4096,0.020479999482631683
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,4096,4096,0.028736000880599022
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,4096,3584,0.02627200074493885
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,4096,65536,0.22390399873256683
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,4096,3584,0.027936000376939774
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,4096,3584,0.019840000197291374
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,4096,3072,0.025567999109625816
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,4096,3072,0.02537599951028824
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,4096,2560,0.020896000787615776
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,4096,2560,0.0208320003002882
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,4096,3072,0.016767999157309532
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,4096,2048,0.01990400068461895
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,4096,2560,0.016672000288963318
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,4096,2048,0.021376000717282295
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,4096,1536,0.0161920003592968
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,4096,2048,0.013728000223636627
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,4096,1536,0.01865600049495697
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,4096,1024,0.013344000093638897
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,4096,1024,0.013567999936640263
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,4096,1536,0.012543999589979649
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,4096,768,0.013791999779641628
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,4096,1024,0.011264000087976456
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,4096,768,0.012480000033974648
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,4096,512,0.011744000017642975
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,4096,512,0.010944000445306301
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,4096,768,0.011680000461637974
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,4096,512,0.011264000087976456
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,4096,256,0.010463999584317207
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,4096,256,0.012256000190973282
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,4096,128,0.010048000141978264
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,4096,128,0.009568000212311745
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,4096,128,0.007903999648988247
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,4096,64,0.013472000136971474
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,4096,64,0.013439999893307686
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,4096,64,0.0074880002066493034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,4096,32,0.014527999795973301
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,4096,32,0.013856000266969204
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,4096,32,0.007552000228315592
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,3584,12288,0.07840000092983246
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,3584,16384,0.0634239986538887
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,3584,16384,0.06470400094985962
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,4096,256,0.00863999966531992
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,3584,16384,0.056992001831531525
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,3584,12288,0.04995200037956238
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,3584,10240,0.06387200206518173
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,3584,12288,0.04608000069856644
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,3584,10240,0.04383999854326248
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,3584,8192,0.05238400027155876
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,3584,10240,0.03936000168323517
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,3584,8192,0.0435199998319149
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,3584,65536,0.16441600024700165
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,3584,7168,0.04774399846792221
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,3584,8192,0.032896000891923904
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,3584,65536,0.20444799959659576
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,3584,7168,0.04483199864625931
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,3584,6144,0.03852799907326698
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,3584,5120,0.035360001027584076
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,3584,6144,0.03728000074625015
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,3584,5120,0.033055998384952545
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,3584,6144,0.02703999914228916
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,3584,7168,0.0297279991209507
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,3584,5120,0.02332800067961216
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,3584,4096,0.02921600081026554
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,3584,4096,0.027775999158620834
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,3584,3584,0.027807999402284622
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,3584,4096,0.021056000143289566
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,3584,65536,0.20550400018692017
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,3584,3584,0.02566399984061718
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,3584,3584,0.01958400011062622
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,3584,3072,0.025312000885605812
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,3584,3072,0.023840000852942467
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,3584,2560,0.02035200037062168
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,3584,3072,0.017376000061631203
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,3584,2560,0.020255999639630318
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,3584,2048,0.0197759997099638
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,3584,2560,0.01664000004529953
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,3584,2048,0.0180479995906353
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,3584,2048,0.014271999709308147
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,3584,1536,0.016607999801635742
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,3584,1536,0.015584000386297703
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,3584,1024,0.013055999763309956
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,3584,1024,0.012864000163972378
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,3584,1536,0.012543999589979649
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,3584,1024,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,3584,768,0.012223999947309494
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,3584,768,0.01206399966031313
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,3584,768,0.010879999957978725
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,3584,512,0.011136000044643879
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,3584,512,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,3584,512,0.01104000024497509
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,3584,256,0.0098879998549819
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,3584,256,0.010495999827980995
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,3584,256,0.008608000352978706
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,3584,128,0.00940799992531538
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,3584,128,0.00979200005531311
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,3584,128,0.008191999979317188
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,3584,64,0.012992000207304955
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,3584,64,0.012768000364303589
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,3584,64,0.007424000184983015
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,3584,32,0.013824000023305416
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,3584,32,0.013311999849975109
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,3584,32,0.007712000049650669
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,3072,12288,0.06380800157785416
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,3072,12288,0.06790400296449661
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,3072,16384,0.054368000477552414
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,3072,16384,0.05398400127887726
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,3072,16384,0.05657599866390228
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,3072,10240,0.05344000086188316
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,3072,12288,0.04524800181388855
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,3072,10240,0.04281599819660187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,3072,8192,0.04665600135922432
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,3072,8192,0.04841599985957146
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,3072,10240,0.038656000047922134
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,3072,8192,0.03215999901294708
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,3072,65536,0.1464959979057312
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,3072,7168,0.044224001467227936
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,3072,65536,0.14342400431632996
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,3072,7168,0.04390399903059006
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,3072,65536,0.20080000162124634
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,3072,6144,0.03907199949026108
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,3072,7168,0.030688000842928886
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,3072,6144,0.03859199956059456
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,3072,5120,0.033376000821590424
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,3072,6144,0.027424000203609467
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,3072,5120,0.023455999791622162
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,3072,4096,0.028095999732613564
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,3072,5120,0.0306560005992651
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,3072,4096,0.027904000133275986
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,3072,3584,0.027264000847935677
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,3072,3584,0.029952000826597214
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,3072,3072,0.027295999228954315
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,3072,3584,0.0191040001809597
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,3072,3072,0.023296000435948372
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,3072,2560,0.019807999953627586
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,3072,2560,0.02348800003528595
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,3072,3072,0.016896000131964684
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,3072,2560,0.016767999157309532
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,3072,2048,0.018719999119639397
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,3072,2048,0.017311999574303627
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,3072,2048,0.014015999622642994
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,3072,1536,0.016863999888300896
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,3072,1536,0.014303999952971935
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,3072,1536,0.01228800043463707
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,3072,1024,0.01500799972563982
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,3072,4096,0.02179200015962124
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,3072,1024,0.012927999719977379
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,3072,768,0.012959999963641167
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,3072,1024,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,3072,768,0.01190400030463934
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,3072,512,0.011935999616980553
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,3072,768,0.010528000071644783
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,3072,256,0.010688000358641148
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,3072,512,0.010463999584317207
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,3072,256,0.010400000028312206
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,3072,512,0.011136000044643879
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,3072,256,0.00854399986565113
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,3072,128,0.009440000168979168
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,3072,128,0.010912000201642513
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,3072,128,0.0077760000713169575
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,3072,64,0.014112000353634357
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,3072,64,0.013567999936640263
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,3072,64,0.008128000423312187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,3072,32,0.013472000136971474
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,3072,32,0.012992000207304955
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,3072,32,0.0074880002066493034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,2560,12288,0.05897599831223488
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,2560,12288,0.042208001017570496
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,2560,16384,0.06230400130152702
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,2560,16384,0.05116799846291542
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,2560,12288,0.03718400001525879
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,2560,16384,0.037567999213933945
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,2560,10240,0.04182400181889534
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,2560,10240,0.03728000074625015
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,2560,8192,0.049695998430252075
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,2560,10240,0.028063999488949776
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,2560,8192,0.03494400158524513
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,2560,7168,0.03404799848794937
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,2560,8192,0.023296000435948372
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,2560,65536,0.12943999469280243
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,2560,7168,0.03094400092959404
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,2560,6144,0.03392000123858452
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,2560,7168,0.03542400151491165
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,2560,65536,0.1579200029373169
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,2560,6144,0.030112000182271004
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,2560,5120,0.028991999104619026
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,2560,6144,0.018719999119639397
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,2560,5120,0.028704000636935234
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,2560,4096,0.025599999353289604
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,2560,5120,0.017664000391960144
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,2560,3584,0.025887999683618546
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,2560,4096,0.022752000018954277
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,2560,65536,0.13238400220870972
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,2560,4096,0.01568000018596649
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,2560,3584,0.021536000072956085
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,2560,3072,0.023744000121951103
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,2560,3584,0.019807999953627586
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,2560,3072,0.02035200037062168
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,2560,2560,0.017152000218629837
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,2560,2560,0.01772800087928772
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,2560,3072,0.012992000207304955
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,2560,2560,0.013248000293970108
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,2560,2048,0.019551999866962433
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,2560,2048,0.016256000846624374
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,2560,2048,0.01375999953597784
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,2560,1536,0.016704000532627106
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,2560,1536,0.014655999839305878
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,2560,1024,0.012768000364303589
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,2560,1536,0.011231999844312668
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,2560,1024,0.01228800043463707
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,2560,1024,0.010015999898314476
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,2560,768,0.01158399973064661
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,2560,768,0.011392000131309032
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,2560,512,0.011136000044643879
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,2560,768,0.013055999763309956
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,2560,512,0.01104000024497509
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,2560,256,0.010208000428974628
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,2560,512,0.011136000044643879
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,2560,256,0.009664000011980534
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,2560,256,0.009184000082314014
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,2560,128,0.009535999968647957
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,2560,128,0.00940799992531538
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,2560,64,0.012799999676644802
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,2560,128,0.007648000027984381
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,2560,64,0.013024000450968742
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,2560,64,0.007424000184983015
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,2560,32,0.013567999936640263
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,2560,32,0.012575999833643436
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,2560,32,0.007552000228315592
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,2048,12288,0.04108799993991852
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,2048,12288,0.03702399879693985
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,2048,16384,0.04435199871659279
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,2048,16384,0.03587200120091438
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,2048,12288,0.03481600061058998
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,2048,10240,0.03852799907326698
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,2048,10240,0.03542400151491165
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,2048,10240,0.02566399984061718
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,2048,8192,0.03651199862360954
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,2048,8192,0.03187200054526329
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,2048,16384,0.04726399853825569
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,2048,8192,0.022431999444961548
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,2048,65536,0.12700800597667694
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,2048,7168,0.02816000021994114
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,2048,7168,0.03232000023126602
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,2048,65536,0.14655999839305878
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,2048,6144,0.02687999978661537
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,2048,7168,0.023455999791622162
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,2048,6144,0.026048000901937485
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,2048,5120,0.024607999250292778
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,2048,6144,0.019231999292969704
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,2048,5120,0.03884800150990486
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,2048,4096,0.02131200022995472
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,2048,65536,0.12064000219106674
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,2048,5120,0.01692800037562847
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,2048,4096,0.020128000527620316
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,2048,4096,0.015135999768972397
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,2048,3584,0.019840000197291374
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,2048,3584,0.02054399996995926
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,2048,3584,0.014688000082969666
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,2048,3072,0.0180479995906353
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,2048,2560,0.01696000061929226
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,2048,3072,0.019967999309301376
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,2048,3072,0.014303999952971935
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,2048,2560,0.015776000916957855
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,2048,2560,0.013248000293970108
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,2048,2048,0.01568000018596649
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,2048,2048,0.02143999934196472
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,2048,1536,0.014240000396966934
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,2048,1536,0.013632000423967838
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,2048,1536,0.011296000331640244
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,2048,2048,0.012415999546647072
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,2048,1024,0.012128000147640705
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,2048,1024,0.01532800029963255
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,2048,768,0.010912000201642513
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,2048,1024,0.009952000342309475
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,2048,768,0.011136000044643879
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,2048,768,0.010495999827980995
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,2048,512,0.010591999627649784
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,2048,512,0.010111999697983265
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,2048,512,0.009279999881982803
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,2048,256,0.009920000098645687
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,2048,256,0.009855999611318111
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,2048,128,0.009696000255644321
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,2048,128,0.009696000255644321
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,2048,128,0.00774399982765317
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,2048,64,0.012608000077307224
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,2048,256,0.008191999979317188
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,2048,64,0.01369599997997284
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,2048,32,0.012768000364303589
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,2048,64,0.007519999984651804
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,2048,32,0.012896000407636166
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,2048,32,0.007519999984651804
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,1536,12288,0.033055998384952545
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,1536,12288,0.03564799949526787
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,1536,16384,0.04601600021123886
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,1536,16384,0.041728001087903976
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,1536,16384,0.0352960005402565
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,1536,12288,0.029888000339269638
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,1536,10240,0.030559999868273735
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,1536,10240,0.0306560005992651
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,1536,8192,0.03548799827694893
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,1536,10240,0.026464000344276428
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,1536,8192,0.027744000777602196
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,1536,7168,0.03436800092458725
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,1536,65536,0.09312000125646591
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,1536,8192,0.02239999920129776
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,1536,65536,0.10172799974679947
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,1536,7168,0.036031998693943024
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,1536,7168,0.020896000787615776
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,1536,6144,0.025599999353289604
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,1536,6144,0.03177599981427193
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,1536,5120,0.02348800003528595
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,1536,5120,0.024512000381946564
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,1536,6144,0.018464000895619392
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,1536,5120,0.017184000462293625
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,1536,4096,0.032255999743938446
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,1536,4096,0.022175999358296394
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,1536,65536,0.1178240031003952
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,1536,4096,0.015039999969303608
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,1536,3584,0.01961600035429001
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,1536,3584,0.019680000841617584
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,1536,3584,0.014368000440299511
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,1536,3072,0.018751999363303185
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,1536,3072,0.018015999346971512
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,1536,2560,0.0161920003592968
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,1536,2560,0.01539199985563755
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,1536,3072,0.012512000277638435
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,1536,2048,0.015615999698638916
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,1536,2048,0.02112000063061714
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,1536,1536,0.014240000396966934
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,1536,2048,0.01158399973064661
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,1536,1536,0.013151999562978745
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,1536,1024,0.012191999703645706
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,1536,1536,0.012319999746978283
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,1536,1024,0.012128000147640705
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,1536,1024,0.009631999768316746
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,1536,768,0.010975999757647514
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,1536,768,0.011071999557316303
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,1536,512,0.010367999784648418
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,1536,768,0.009119999594986439
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,1536,512,0.01228800043463707
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,1536,512,0.010143999941647053
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,1536,256,0.009312000125646591
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,1536,2560,0.012959999963641167
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,1536,256,0.00979200005531311
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,1536,256,0.008767999708652496
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,1536,128,0.009279999881982803
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,1536,128,0.009216000325977802
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,1536,64,0.013120000250637531
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,1536,128,0.007327999919652939
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,1536,32,0.013248000293970108
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,1536,64,0.012384000234305859
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,1536,64,0.007199999876320362
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,1536,32,0.007296000141650438
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,1536,32,0.012864000163972378
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,1024,12288,0.039264000952243805
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,1024,12288,0.029823999851942062
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,1024,16384,0.041120000183582306
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,1024,16384,0.03782400116324425
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,1024,16384,0.034591998904943466
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,1024,12288,0.04684799909591675
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,1024,10240,0.02876799926161766
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,1024,10240,0.02812799997627735
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,1024,10240,0.03766399994492531
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,1024,8192,0.027135999873280525
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,1024,8192,0.025760000571608543
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,1024,65536,0.07923199981451035
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,1024,8192,0.022336000576615334
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,1024,65536,0.11699199676513672
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,1024,65536,0.0830719992518425
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,1024,7168,0.02537599951028824
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,1024,6144,0.028095999732613564
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,1024,7168,0.025439999997615814
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,1024,7168,0.028704000636935234
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,1024,6144,0.023104000836610794
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,1024,5120,0.024512000381946564
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,1024,6144,0.018848000094294548
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,1024,5120,0.021247999742627144
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,1024,4096,0.021056000143289566
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,1024,4096,0.019936000928282738
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,1024,5120,0.016736000776290894
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,1024,4096,0.014976000413298607
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,1024,3584,0.019519999623298645
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,1024,3584,0.018239999189972878
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,1024,3072,0.017311999574303627
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,1024,3072,0.017472000792622566
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,1024,3584,0.014368000440299511
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,1024,3072,0.01600000075995922
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,1024,2560,0.01600000075995922
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,1024,2560,0.015647999942302704
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,1024,2048,0.014751999638974667
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,1024,2560,0.01283199992030859
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,1024,2048,0.015424000099301338
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,1024,1536,0.013151999562978745
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,1024,2048,0.010623999871313572
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,1024,1536,0.013535999692976475
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,1024,1536,0.010463999584317207
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,1024,1024,0.011807999573647976
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,1024,1024,0.01158399973064661
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,1024,768,0.01119999960064888
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,1024,1024,0.009952000342309475
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,1024,768,0.011071999557316303
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,1024,512,0.009983999654650688
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,1024,512,0.010015999898314476
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,1024,768,0.011103999800980091
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,1024,512,0.0098879998549819
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,1024,256,0.009440000168979168
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,1024,256,0.009855999611318111
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,1024,256,0.008224000222980976
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,1024,128,0.009855999611318111
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,1024,128,0.00902399979531765
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,1024,64,0.012384000234305859
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,1024,128,0.0072639998979866505
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,1024,32,0.012959999963641167
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,1024,64,0.012703999876976013
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,1024,64,0.007199999876320362
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,1024,32,0.013151999562978745
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,1024,32,0.0072639998979866505
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,768,12288,0.037856001406908035
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,768,16384,0.033952001482248306
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,768,16384,0.03388800099492073
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,768,16384,0.054976001381874084
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,768,12288,0.029055999591946602
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,768,10240,0.02831999957561493
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,768,10240,0.027712000533938408
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,768,10240,0.037728000432252884
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,768,65536,0.07407999783754349
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,768,8192,0.023584000766277313
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,768,8192,0.027135999873280525
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,768,12288,0.02876799926161766
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,768,8192,0.03215999901294708
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,768,65536,0.06864000111818314
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,768,7168,0.027904000133275986
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,768,7168,0.022784000262618065
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,768,6144,0.022784000262618065
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,768,6144,0.022272000089287758
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,768,7168,0.029279999434947968
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,768,5120,0.025087999179959297
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,768,6144,0.02534399926662445
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,768,5120,0.02038400061428547
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,768,65536,0.11711999773979187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,768,4096,0.021247999742627144
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,768,4096,0.019519999623298645
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,768,5120,0.022624000906944275
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,768,4096,0.016607999801635742
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,768,3584,0.01913600042462349
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,768,3584,0.01852799952030182
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,768,3072,0.017216000705957413
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,768,3584,0.01849599927663803
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,768,3072,0.01772800087928772
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,768,3072,0.012799999676644802
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,768,2560,0.01532800029963255
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,768,2560,0.01548799965530634
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,768,2048,0.014720000326633453
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,768,2560,0.015072000212967396
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,768,2048,0.014655999839305878
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,768,2048,0.012896000407636166
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,768,1536,0.013248000293970108
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,768,1536,0.013407999649643898
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,768,1024,0.012992000207304955
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,768,1024,0.011807999573647976
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,768,1536,0.01017600018531084
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,768,768,0.010528000071644783
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,768,1024,0.009151999838650227
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,768,768,0.012160000391304493
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,768,768,0.011168000288307667
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,768,512,0.010304000228643417
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,768,512,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,768,256,0.009472000412642956
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,768,256,0.009247999638319016
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,768,512,0.009664000011980534
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,768,128,0.009472000412642956
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,768,256,0.007935999892652035
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,768,128,0.008895999751985073
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,768,128,0.007584000006318092
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,768,64,0.012768000364303589
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,768,32,0.01283199992030859
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,768,64,0.012736000120639801
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,768,64,0.00687999976798892
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,768,32,0.012799999676644802
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,768,32,0.006912000011652708
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,512,12288,0.030848000198602676
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,512,12288,0.025919999927282333
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,512,16384,0.03033600002527237
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,512,16384,0.0306560005992651
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,512,12288,0.045024000108242035
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,512,16384,0.05430399999022484
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,512,10240,0.02454400062561035
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,512,10240,0.02831999957561493
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,512,8192,0.029888000339269638
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,512,8192,0.02412799932062626
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,512,10240,0.03846399858593941
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,512,8192,0.0315839983522892
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,512,7168,0.02956799976527691
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,512,65536,0.05500800162553787
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,512,7168,0.02380800060927868
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,512,65536,0.06627199798822403
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,512,7168,0.020255999639630318
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,512,6144,0.021247999742627144
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,512,6144,0.0226879995316267
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,512,5120,0.020479999482631683
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,512,5120,0.021407999098300934
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,512,5120,0.01692800037562847
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,512,6144,0.018239999189972878
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,512,4096,0.020927999168634415
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,512,4096,0.018688000738620758
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,512,65536,0.11660800129175186
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,512,4096,0.014976000413298607
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,512,3584,0.017855999991297722
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,512,3584,0.019519999623298645
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,512,3584,0.018432000651955605
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,512,3072,0.01756799966096878
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,512,2560,0.015039999969303608
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,512,3072,0.01744000054895878
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,512,2560,0.01587199978530407
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,512,3072,0.01571200042963028
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,512,2048,0.01408000010997057
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,512,2048,0.014816000126302242
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,512,2560,0.012608000077307224
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,512,2048,0.010591999627649784
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,512,1536,0.013120000250637531
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,512,1536,0.013535999692976475
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,512,1536,0.01152000017464161
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,512,1024,0.011744000017642975
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,512,1024,0.011168000288307667
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,512,1024,0.009247999638319016
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,512,768,0.011071999557316303
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,512,768,0.010400000028312206
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,512,512,0.00863999966531992
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,512,512,0.010048000141978264
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,512,512,0.009664000011980534
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,512,256,0.009631999768316746
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,512,256,0.009088000282645226
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,512,128,0.008799999952316284
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,512,256,0.008063999935984612
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,512,128,0.008832000195980072
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,512,64,0.012608000077307224
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,512,64,0.013024000450968742
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,512,128,0.007360000163316727
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,512,32,0.012191999703645706
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,512,64,0.007199999876320362
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,512,32,0.012959999963641167
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,512,32,0.007040000054985285
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,512,768,0.009855999611318111
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,256,12288,0.02687999978661537
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,256,16384,0.02844800055027008
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,256,16384,0.029023999348282814
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,256,12288,0.025887999683618546
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,256,16384,0.03440000116825104
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,256,12288,0.04435199871659279
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,256,10240,0.025631999596953392
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,256,10240,0.023552000522613525
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,256,8192,0.022112000733613968
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,256,10240,0.03868800029158592
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,256,8192,0.020927999168634415
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,256,65536,0.04639999940991402
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,256,65536,0.0469760000705719
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,256,8192,0.03097599931061268
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,256,7168,0.021023999899625778
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,256,7168,0.02844800055027008
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,256,7168,0.026079999282956123
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,256,6144,0.020576000213623047
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,256,6144,0.02035200037062168
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,256,5120,0.0226879995316267
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,256,5120,0.019711999222636223
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,256,6144,0.026496000587940216
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,256,5120,0.016383999958634377
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,256,4096,0.019936000928282738
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,256,4096,0.01836800016462803
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,256,65536,0.11599999666213989
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,256,4096,0.015072000212967396
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,256,3584,0.01740800030529499
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,256,3584,0.018688000738620758
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,256,3072,0.017664000391960144
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,256,3584,0.014112000353634357
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,256,3072,0.017152000218629837
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,256,2560,0.01539199985563755
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,256,2560,0.015744000673294067
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,256,3072,0.012191999703645706
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,256,2560,0.015231999568641186
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,256,2048,0.0144640002399683
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,256,2048,0.015072000212967396
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,256,2048,0.011487999930977821
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,256,1536,0.012543999589979649
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,256,1536,0.013344000093638897
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,256,1536,0.009759999811649323
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,256,1024,0.011680000461637974
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,256,1024,0.011168000288307667
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,256,1024,0.009088000282645226
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,256,768,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,256,768,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,256,768,0.010304000228643417
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,256,512,0.010208000428974628
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,256,512,0.01017600018531084
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,256,256,0.008927999995648861
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,256,512,0.009568000212311745
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,256,256,0.009824000298976898
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,256,256,0.007935999892652035
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,256,128,0.00902399979531765
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,256,128,0.008320000022649765
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,256,128,0.007135999854654074
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,256,64,0.01235199999064207
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,256,64,0.012415999546647072
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,256,32,0.012959999963641167
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,256,64,0.006943999789655209
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,256,32,0.012256000190973282
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,256,32,0.007104000076651573
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,128,12288,0.03574400022625923
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,128,12288,0.024671999737620354
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,128,16384,0.02675200067460537
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,128,16384,0.0272000003606081
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,128,16384,0.055296000093221664
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,128,12288,0.028031999245285988
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,128,10240,0.025567999109625816
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,128,10240,0.023455999791622162
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,128,10240,0.03759999945759773
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,128,8192,0.023135999217629433
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,128,8192,0.02300800010561943
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,128,7168,0.02287999913096428
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,128,8192,0.031968001276254654
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,128,65536,0.04639999940991402
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,128,7168,0.02179200015962124
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,128,65536,0.04732799902558327
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,128,6144,0.022495999932289124
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,128,6144,0.020864000543951988
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,128,7168,0.01990400068461895
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,128,6144,0.01740800030529499
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,128,5120,0.02035200037062168
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,128,5120,0.017696000635623932
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,128,65536,0.11615999788045883
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,128,4096,0.019168000668287277
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,128,4096,0.019328000023961067
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,128,3584,0.019487999379634857
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,128,4096,0.014720000326633453
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,128,3584,0.018239999189972878
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,128,3584,0.013663999736309052
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,128,3072,0.017535999417304993
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,128,3072,0.018688000738620758
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,128,3072,0.012128000147640705
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,128,2560,0.015552000142633915
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,128,2560,0.01583999954164028
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,128,2560,0.01244799979031086
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,128,2048,0.014944000169634819
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,128,2048,0.014655999839305878
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,128,5120,0.020896000787615776
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,128,2048,0.010463999584317207
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,128,1536,0.012640000320971012
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,128,1536,0.012799999676644802
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,128,1536,0.009696000255644321
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,128,1024,0.011392000131309032
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,128,1024,0.011711999773979187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,128,768,0.011071999557316303
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,128,1024,0.009600000455975533
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,128,768,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,128,512,0.009759999811649323
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,128,512,0.010239999741315842
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,128,768,0.010688000358641148
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,128,512,0.009056000038981438
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,128,256,0.009472000412642956
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,128,256,0.00886400043964386
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,128,256,0.007712000049650669
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,128,128,0.00902399979531765
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,128,128,0.008736000396311283
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,128,128,0.007071999832987785
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,128,64,0.012512000277638435
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,128,64,0.012543999589979649
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,128,64,0.007296000141650438
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,128,32,0.012191999703645706
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,128,32,0.012384000234305859
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,128,32,0.006943999789655209
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,64,12288,0.023744000121951103
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,64,12288,0.02470399998128414
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,64,16384,0.02687999978661537
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,64,16384,0.027488000690937042
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,64,16384,0.03903999924659729
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,64,12288,0.04371200129389763
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,64,10240,0.023871999233961105
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,64,10240,0.023584000766277313
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,64,10240,0.025407999753952026
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,64,8192,0.024000000208616257
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,64,8192,0.023391999304294586
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,64,8192,0.021503999829292297
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,64,65536,0.04822399839758873
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,64,7168,0.023104000836610794
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,64,7168,0.023104000836610794
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,64,65536,0.04604800045490265
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,64,7168,0.019807999953627586
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,64,6144,0.02195199951529503
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,64,5120,0.0197759997099638
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,64,6144,0.020191999152302742
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,64,5120,0.021663999184966087
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,64,4096,0.019872000440955162
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,64,5120,0.018624000251293182
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,64,6144,0.018112000077962875
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,64,4096,0.01945599913597107
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,64,65536,0.11648000031709671
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,64,3584,0.018688000738620758
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,64,4096,0.014976000413298607
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,64,3072,0.01759999990463257
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,64,3584,0.01852799952030182
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,64,3072,0.018464000895619392
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,64,2560,0.015936000272631645
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,64,3072,0.012160000391304493
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,64,3584,0.013919999822974205
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,64,2560,0.015200000256299973
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,64,2048,0.01548799965530634
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,64,2048,0.014015999622642994
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,64,1536,0.013407999649643898
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,64,2560,0.012640000320971012
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,64,2048,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,64,1536,0.012640000320971012
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,64,1536,0.009631999768316746
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,64,1024,0.011231999844312668
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,64,1024,0.01196799986064434
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,64,512,0.00979200005531311
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,64,1024,0.008736000396311283
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,64,768,0.01065600011497736
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,64,768,0.010528000071644783
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,64,768,0.010239999741315842
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,64,512,0.009631999768316746
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,64,512,0.008608000352978706
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,64,256,0.00940799992531538
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,64,256,0.008704000152647495
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,64,128,0.009119999594986439
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,64,128,0.008767999708652496
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,64,256,0.0077760000713169575
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,64,128,0.007007999811321497
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,64,64,0.01235199999064207
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,64,64,0.012736000120639801
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,64,64,0.00684799998998642
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,64,32,0.012959999963641167
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,64,32,0.007327999919652939
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,32,12288,0.024800000712275505
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,32,16384,0.027424000203609467
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,32,16384,0.02659199945628643
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,32,16384,0.052352000027894974
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,32,12288,0.02502400055527687
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,64,32,0.012256000190973282
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,32,12288,0.04095999896526337
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,32,10240,0.02380800060927868
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,32,10240,0.02566399984061718
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,32,65536,0.04668800160288811
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,32,8192,0.024351999163627625
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,32,8192,0.024512000381946564
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,32,65536,0.04614400118589401
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,32,10240,0.034752000123262405
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,32,8192,0.028991999104619026
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,32,7168,0.02287999913096428
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,32,7168,0.022752000018954277
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,32,6144,0.020416000857949257
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,32,6144,0.02412799932062626
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,32,5120,0.020735999569296837
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,32,7168,0.027488000690937042
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,32,6144,0.02550400048494339
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,32,5120,0.01942400075495243
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,32,65536,0.11107199639081955
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,32,4096,0.01865600049495697
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,32,5120,0.022112000733613968
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,32,4096,0.01913600042462349
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,32,3584,0.020255999639630318
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,32,3584,0.018432000651955605
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,32,4096,0.01820800080895424
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,32,3584,0.017472000792622566
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,32,3072,0.017503999173641205
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,32,2560,0.01583999954164028
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,32,3072,0.017376000061631203
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,32,3072,0.015039999969303608
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,32,2048,0.01484800036996603
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,32,2560,0.015424000099301338
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,32,2560,0.015231999568641186
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,32,2048,0.014783999882638454
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,32,2048,0.012671999633312225
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,32,1536,0.012575999833643436
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,32,1024,0.011776000261306763
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,32,1024,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,32,1536,0.012703999876976013
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,32,1536,0.010591999627649784
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,32,768,0.010879999957978725
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,32,1024,0.011392000131309032
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,32,768,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,32,512,0.009983999654650688
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,32,768,0.00902399979531765
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,32,512,0.009824000298976898
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,32,256,0.009056000038981438
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,32,512,0.00848000030964613
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,32,256,0.009119999594986439
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,32,256,0.0077760000713169575
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,32,128,0.00886400043964386
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,32,128,0.009151999838650227
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,32,64,0.011935999616980553
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,32,128,0.007424000184983015
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,32,64,0.01235199999064207
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,192,32,32,0.013344000093638897
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,32,64,0.006783999968320131
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,192,32,32,0.013024000450968742
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,192,32,32,0.007360000163316727
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,65536,10240,0.5034880042076111
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,65536,12288,0.6854079961776733
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,65536,10240,0.44377601146698
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,65536,12288,0.6286720037460327
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,65536,16384,0.7433279752731323
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,65536,8192,0.42374399304389954
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,65536,8192,0.4315840005874634
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,65536,16384,0.8235840201377869
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,65536,12288,0.4095360040664673
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,65536,7168,0.38790398836135864
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,65536,6144,0.3054080009460449
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,65536,10240,0.3439359962940216
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,65536,16384,0.5350080132484436
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,65536,7168,0.37827199697494507
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,65536,8192,0.2803199887275696
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,65536,7168,0.25331199169158936
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,65536,6144,0.33395200967788696
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,65536,5120,0.28575998544692993
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,65536,5120,0.2746880054473877
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,65536,4096,0.20547200739383698
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,65536,6144,0.21852800250053406
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,65536,4096,0.2080959975719452
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,65536,3584,0.19382399320602417
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,65536,3584,0.18627199530601501
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,65536,3072,0.1634880006313324
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,65536,5120,0.18681600689888
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,65536,2560,0.13369600474834442
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,65536,3072,0.28911998867988586
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,65536,4096,0.15308800339698792
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,65536,3584,0.13808000087738037
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,65536,2560,0.14319999516010284
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,65536,2048,0.11247999966144562
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,65536,1536,0.08883199840784073
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,65536,2048,0.19392000138759613
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,65536,1536,0.15027199685573578
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,65536,2560,0.10639999806880951
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,65536,2048,0.08867199718952179
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,65536,1024,0.09251199662685394
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,65536,1024,0.0629120022058487
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,65536,1536,0.07414399832487106
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,65536,768,0.0735040009021759
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,65536,1024,0.05686400085687637
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,65536,768,0.07612799853086472
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,65536,512,0.05167999863624573
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,65536,768,0.0496320016682148
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,65536,512,0.05283199995756149
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,65536,3072,0.12172800302505493
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,65536,512,0.041152000427246094
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,65536,256,0.02937600016593933
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,65536,256,0.02864000014960766
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,65536,128,0.022495999932289124
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,65536,128,0.021727999672293663
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,65536,128,0.021663999184966087
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,65536,256,0.02707199938595295
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,65536,64,0.022304000332951546
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,65536,64,0.021407999098300934
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,65536,32,0.0297279991209507
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,65536,64,0.024000000208616257
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,65536,32,0.030208000913262367
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,65536,32,0.024800000712275505
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,16384,12288,0.15724800527095795
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,16384,12288,0.14777599275112152
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,16384,16384,0.20508800446987152
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,16384,16384,0.1382399946451187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,16384,16384,0.19475199282169342
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,16384,12288,0.11187200248241425
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,16384,10240,0.1334719955921173
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,16384,10240,0.2279680073261261
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,16384,10240,0.09232000261545181
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,16384,8192,0.09916800260543823
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,16384,8192,0.10313600301742554
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,16384,8192,0.07577600330114365
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,16384,65536,0.8199999928474426
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,16384,7168,0.09427200257778168
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,16384,7168,0.09529600292444229
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,16384,6144,0.077504001557827
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,16384,5120,0.07756800204515457
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,16384,6144,0.060127999633550644
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,16384,6144,0.0883840024471283
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,16384,5120,0.06585600227117538
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,16384,7168,0.06726399809122086
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,16384,65536,1.476639986038208
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,16384,4096,0.0926079973578453
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,16384,4096,0.0594559982419014
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,16384,4096,0.042080000042915344
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,16384,5120,0.05257600173354149
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,16384,3584,0.047168001532554626
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,16384,3584,0.05548800155520439
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,16384,3584,0.0395519994199276
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,16384,3072,0.041600000113248825
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,16384,3072,0.04483199864625931
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,16384,3072,0.03532800078392029
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,16384,2560,0.061344001442193985
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,16384,2560,0.0414079986512661
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,16384,2048,0.049695998430252075
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,16384,2560,0.03094400092959404
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,16384,2048,0.050016000866889954
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,16384,1536,0.039264000952243805
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,16384,1536,0.039744000881910324
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,16384,2048,0.026559999212622643
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,16384,1536,0.023711999878287315
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,16384,1024,0.027648000046610832
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,16384,1024,0.028255999088287354
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,16384,1024,0.01852799952030182
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,16384,768,0.023231999948620796
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,16384,768,0.02396799996495247
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,16384,512,0.018848000094294548
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,16384,768,0.01692800037562847
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,16384,512,0.01817600056529045
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,16384,512,0.015135999768972397
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,16384,256,0.01360000018030405
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,16384,65536,0.5223360061645508
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,16384,128,0.012512000277638435
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,16384,128,0.012000000104308128
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,16384,256,0.013728000223636627
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,16384,256,0.012415999546647072
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,16384,128,0.011136000044643879
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,16384,64,0.014208000153303146
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,16384,64,0.014336000196635723
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,16384,64,0.010367999784648418
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,16384,32,0.016256000846624374
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,16384,32,0.015968000516295433
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,16384,32,0.010495999827980995
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,12288,12288,0.15043200552463531
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,12288,16384,0.19120000302791595
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,12288,16384,0.18547199666500092
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,12288,12288,0.10732799768447876
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,12288,16384,0.13862399756908417
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,12288,10240,0.1263040006160736
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,12288,10240,0.12831999361515045
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,12288,12288,0.12348800152540207
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,12288,8192,0.10883200168609619
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,12288,65536,0.6120960116386414
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,12288,10240,0.09100800007581711
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,12288,8192,0.10931199789047241
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,12288,7168,0.09279999881982803
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,12288,7168,0.13635200262069702
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,12288,8192,0.07420799881219864
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,12288,6144,0.07942400127649307
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,12288,6144,0.07635200023651123
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,12288,65536,0.6658560037612915
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,12288,7168,0.06748799979686737
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,12288,5120,0.09004800021648407
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,12288,4096,0.04851200059056282
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,12288,5120,0.07020799815654755
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,12288,6144,0.05849599838256836
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,12288,5120,0.050783999264240265
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,12288,4096,0.051072001457214355
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,12288,3584,0.0459199994802475
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,12288,3584,0.0453759990632534
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,12288,3072,0.041120000183582306
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,12288,4096,0.04211200028657913
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,12288,3584,0.03836800158023834
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,12288,3072,0.06790400296449661
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,12288,3072,0.03420799970626831
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,12288,2560,0.05555199831724167
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,12288,2048,0.048128001391887665
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,12288,2560,0.05331199988722801
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,12288,2560,0.030592000111937523
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,12288,2048,0.044704001396894455
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,12288,65536,0.5237759947776794
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,12288,2048,0.029791999608278275
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,12288,1536,0.03788800165057182
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,12288,1536,0.03532800078392029
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,12288,1024,0.025599999353289604
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,12288,1024,0.02550400048494339
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,12288,1536,0.022784000262618065
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,12288,768,0.022175999358296394
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,12288,1024,0.01724799908697605
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,12288,768,0.02236800082027912
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,12288,768,0.01616000011563301
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,12288,512,0.017184000462293625
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,12288,512,0.016767999157309532
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,12288,256,0.013632000423967838
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,12288,256,0.012640000320971012
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,12288,512,0.013376000337302685
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,12288,256,0.010495999827980995
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,12288,128,0.012000000104308128
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,12288,128,0.01119999960064888
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,12288,128,0.009727999567985535
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,12288,64,0.014720000326633453
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,12288,64,0.008576000109314919
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,12288,64,0.014303999952971935
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,12288,32,0.015296000055968761
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,12288,32,0.015456000342965126
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,12288,32,0.00886400043964386
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,10240,12288,0.1241919994354248
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,10240,12288,0.13247999548912048
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,10240,16384,0.1629759967327118
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,10240,16384,0.1364160031080246
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,10240,12288,0.10758399963378906
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,10240,16384,0.16527999937534332
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,10240,10240,0.1143679991364479
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,10240,10240,0.11459200084209442
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,10240,10240,0.08985599875450134
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,10240,8192,0.09279999881982803
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,10240,8192,0.09491200000047684
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,10240,7168,0.08396799862384796
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,10240,8192,0.06921599805355072
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,10240,7168,0.08515200018882751
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,10240,65536,0.6122559905052185
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,10240,6144,0.07599999755620956
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,10240,6144,0.07734400033950806
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,10240,5120,0.06265600025653839
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,10240,7168,0.0623680017888546
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,10240,6144,0.05500800162553787
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,10240,5120,0.08566399663686752
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,10240,65536,0.5605120062828064
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,10240,4096,0.0764480009675026
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,10240,4096,0.05532800033688545
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,10240,5120,0.048608001321554184
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,10240,3584,0.05087999999523163
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,10240,3584,0.051072001457214355
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,10240,4096,0.03929600119590759
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,10240,3072,0.056352000683546066
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,10240,3072,0.04483199864625931
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,10240,3072,0.03232000023126602
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,10240,3584,0.035711999982595444
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,10240,2560,0.04255999997258186
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,10240,2560,0.04931199923157692
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,10240,2560,0.027936000376939774
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,10240,2048,0.040608000010252
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,10240,2048,0.040511999279260635
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,10240,1536,0.03311999887228012
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,10240,2048,0.023903999477624893
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,10240,1536,0.03177599981427193
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,10240,1536,0.02131200022995472
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,10240,1024,0.023903999477624893
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,10240,65536,0.5218560099601746
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,10240,1024,0.023520000278949738
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,10240,768,0.020160000771284103
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,10240,768,0.01926399953663349
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,10240,1024,0.01679999940097332
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,10240,512,0.01651199907064438
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,10240,512,0.017855999991297722
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,10240,256,0.013024000450968742
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,10240,512,0.012927999719977379
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,10240,256,0.012480000033974648
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,10240,128,0.011744000017642975
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,10240,256,0.01056000031530857
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,10240,128,0.011487999930977821
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,10240,64,0.014495999552309513
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,10240,64,0.013952000066637993
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,10240,128,0.009535999968647957
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,10240,64,0.008511999621987343
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,10240,32,0.01500799972563982
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,10240,32,0.00854399986565113
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,10240,32,0.014816000126302242
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,10240,768,0.015776000916957855
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,8192,12288,0.09670399874448776
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,8192,16384,0.12012799829244614
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,8192,12288,0.10035199671983719
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,8192,16384,0.17641599476337433
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,8192,16384,0.10860799998044968
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,8192,12288,0.08764799684286118
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,8192,10240,0.07388799637556076
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,8192,10240,0.11859200149774551
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,8192,10240,0.07289600372314453
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,8192,8192,0.07804799824953079
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,8192,65536,0.4790399968624115
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,8192,7168,0.0724480003118515
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,8192,8192,0.08614400029182434
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,8192,8192,0.054048001766204834
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,8192,7168,0.09465599805116653
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,8192,65536,0.46988800168037415
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,8192,65536,0.4339520037174225
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,8192,7168,0.04899200052022934
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,8192,6144,0.06326399743556976
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,8192,6144,0.08070400357246399
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,8192,5120,0.05875200033187866
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,8192,6144,0.047231998294591904
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,8192,5120,0.07148800045251846
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,8192,4096,0.04825599864125252
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,8192,4096,0.04915200173854828
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,8192,3584,0.04707200080156326
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,8192,5120,0.04153599962592125
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,8192,3584,0.044064000248909
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,8192,4096,0.030239999294281006
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,8192,3072,0.039712000638246536
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,8192,3584,0.027648000046610832
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,8192,3072,0.038495998829603195
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,8192,2560,0.032896000891923904
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,8192,2560,0.03379200026392937
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,8192,3072,0.027295999228954315
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,8192,2048,0.02860799990594387
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,8192,2048,0.02864000014960766
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,8192,2560,0.024032000452280045
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,8192,2048,0.019231999292969704
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,8192,1536,0.02332800067961216
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,8192,1024,0.018432000651955605
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,8192,1536,0.026623999699950218
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,8192,1024,0.019807999953627586
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,8192,1536,0.016831999644637108
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,8192,768,0.016095999628305435
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,8192,1024,0.013919999822974205
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,8192,768,0.01772800087928772
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,8192,768,0.014495999552309513
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,8192,512,0.013632000423967838
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,8192,512,0.01283199992030859
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,8192,256,0.011615999974310398
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,8192,512,0.011552000418305397
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,8192,256,0.011552000418305397
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,8192,128,0.01027199998497963
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,8192,256,0.009824000298976898
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,8192,128,0.01104000024497509
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,8192,64,0.013344000093638897
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,8192,128,0.008671999908983707
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,8192,64,0.013824000023305416
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,8192,64,0.008287999778985977
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,8192,32,0.014655999839305878
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,8192,32,0.01398400031030178
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,8192,32,0.008415999822318554
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,7168,12288,0.09603200107812881
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,7168,16384,0.10150399804115295
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,7168,12288,0.12863999605178833
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,7168,16384,0.08790399879217148
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,7168,16384,0.09731200337409973
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,7168,10240,0.07891199737787247
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,7168,12288,0.07891199737787247
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,7168,10240,0.07078400254249573
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,7168,10240,0.11507199704647064
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,7168,8192,0.10547199845314026
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,7168,8192,0.08403199911117554
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,7168,65536,0.28044798970222473
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,7168,7168,0.0867839977145195
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,7168,8192,0.05369599908590317
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,7168,65536,0.28070399165153503
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,7168,6144,0.07078400254249573
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,7168,7168,0.04841599985957146
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,7168,6144,0.07228799909353256
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,7168,5120,0.06191999837756157
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,7168,6144,0.04185599833726883
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,7168,5120,0.05862399935722351
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,7168,4096,0.050303999334573746
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,7168,5120,0.036896001547575
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,7168,4096,0.0525440014898777
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,7168,3584,0.04678399860858917
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,7168,4096,0.03235200047492981
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,7168,3584,0.04150399938225746
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,7168,7168,0.09478399902582169
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,7168,3584,0.027807999402284622
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,7168,3072,0.04028800129890442
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,7168,3072,0.039872001856565475
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,7168,65536,0.41628798842430115
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,7168,2560,0.03215999901294708
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,7168,2560,0.03420799970626831
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,7168,3072,0.02489599958062172
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,7168,2560,0.02505600079894066
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,7168,2048,0.028831999748945236
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,7168,2048,0.02860799990594387
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,7168,1536,0.02412799932062626
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,7168,2048,0.02195199951529503
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,7168,1536,0.02579200081527233
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,7168,1536,0.01897599920630455
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,7168,1024,0.01836800016462803
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,7168,1024,0.017952000722289085
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,7168,1024,0.015072000212967396
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,7168,512,0.013632000423967838
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,7168,768,0.016863999888300896
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,7168,768,0.015904000028967857
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,7168,768,0.01398400031030178
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,7168,512,0.014208000153303146
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,7168,512,0.011711999773979187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,7168,256,0.01152000017464161
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,7168,256,0.011103999800980091
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,7168,128,0.010367999784648418
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,7168,256,0.00979200005531311
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,7168,128,0.010463999584317207
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,7168,64,0.013439999893307686
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,7168,128,0.008608000352978706
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,7168,64,0.013632000423967838
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,7168,64,0.008224000222980976
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,7168,32,0.014399999752640724
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,7168,32,0.013887999579310417
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,7168,32,0.00825599953532219
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,6144,12288,0.06703999638557434
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,6144,12288,0.08752000331878662
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,6144,16384,0.1050880029797554
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,6144,16384,0.0793600007891655
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,6144,16384,0.08025600016117096
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,6144,10240,0.08524800091981888
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,6144,12288,0.06576000154018402
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,6144,10240,0.06019200012087822
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,6144,8192,0.08867199718952179
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,6144,10240,0.054207999259233475
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,6144,8192,0.08342400193214417
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,6144,65536,0.2455040067434311
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,6144,8192,0.04399999976158142
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,6144,7168,0.07440000027418137
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,6144,7168,0.07756800204515457
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,6144,65536,0.30902400612831116
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,6144,65536,0.24166400730609894
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,6144,6144,0.07334399968385696
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,6144,6144,0.05587200075387955
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,6144,5120,0.052319999784231186
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,6144,7168,0.04089599847793579
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,6144,6144,0.03606399893760681
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,6144,5120,0.05881600081920624
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,6144,5120,0.03276799991726875
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,6144,4096,0.04739199951291084
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,6144,4096,0.04864000156521797
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,6144,3584,0.04265600070357323
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,6144,3584,0.02380800060927868
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,6144,4096,0.02659199945628643
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,6144,3584,0.04227200150489807
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,6144,3072,0.03903999924659729
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,6144,3072,0.04265600070357323
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,6144,3072,0.02147199958562851
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,6144,2560,0.032735999673604965
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,6144,2560,0.032255999743938446
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,6144,2048,0.02739199995994568
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,6144,2048,0.027744000777602196
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,6144,2560,0.019200000911951065
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,6144,1536,0.022495999932289124
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,6144,2048,0.017535999417304993
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,6144,1536,0.024320000782608986
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,6144,1024,0.017791999503970146
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,6144,768,0.015776000916957855
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,6144,1024,0.018719999119639397
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,6144,1536,0.01500799972563982
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,6144,1024,0.012992000207304955
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,6144,768,0.01484800036996603
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,6144,768,0.012128000147640705
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,6144,512,0.012768000364303589
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,6144,512,0.01408000010997057
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,6144,512,0.010847999714314938
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,6144,256,0.01142400037497282
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,6144,256,0.011103999800980091
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,6144,256,0.008832000195980072
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,6144,128,0.010367999784648418
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,6144,128,0.009920000098645687
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,6144,128,0.0081599997356534
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,6144,64,0.01375999953597784
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,6144,64,0.013663999736309052
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,6144,32,0.013952000066637993
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,6144,32,0.013791999779641628
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,6144,32,0.007840000092983246
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,5120,12288,0.06419199705123901
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,5120,16384,0.11952000111341476
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,5120,16384,0.07366400212049484
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,5120,12288,0.06915199756622314
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,6144,64,0.007615999784320593
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,5120,16384,0.058559998869895935
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,5120,12288,0.0514880008995533
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,5120,10240,0.05423999950289726
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,5120,10240,0.05523199960589409
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,5120,8192,0.056671999394893646
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,5120,10240,0.04745600000023842
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,5120,8192,0.05196800082921982
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,5120,65536,0.4440320134162903
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,5120,7168,0.07196799665689468
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,5120,7168,0.05036799982190132
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,5120,8192,0.03407999873161316
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,5120,65536,0.21193599700927734
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,5120,6144,0.0655680000782013
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,5120,6144,0.06435199826955795
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,5120,5120,0.04556800052523613
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,5120,7168,0.03155200183391571
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,5120,5120,0.05507199838757515
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,5120,6144,0.028896000236272812
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,5120,4096,0.04531199857592583
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,5120,4096,0.045152001082897186
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,5120,4096,0.022304000332951546
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,5120,5120,0.025567999109625816
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,5120,3584,0.046879999339580536
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,5120,3584,0.04320000112056732
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,5120,3584,0.020767999812960625
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,5120,65536,0.2444480061531067
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,5120,3072,0.03743999823927879
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,5120,3072,0.037151999771595
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,5120,2560,0.03219199925661087
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,5120,2560,0.03248000144958496
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,5120,2560,0.016831999644637108
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,5120,3072,0.018719999119639397
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,5120,2048,0.027712000533938408
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,5120,2048,0.02518399991095066
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,5120,1536,0.021727999672293663
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,5120,2048,0.015456000342965126
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,5120,1536,0.021376000717282295
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,5120,1024,0.01711999997496605
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,5120,768,0.014816000126302242
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,5120,1024,0.017696000635623932
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,5120,1536,0.01369599997997284
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,5120,1024,0.012191999703645706
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,5120,768,0.015200000256299973
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,5120,768,0.01119999960064888
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,5120,512,0.013024000450968742
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,5120,512,0.012992000207304955
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,5120,256,0.011264000087976456
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,5120,256,0.010847999714314938
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,5120,512,0.01017600018531084
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,5120,256,0.008767999708652496
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,5120,128,0.010623999871313572
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,5120,128,0.01033599954098463
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,5120,64,0.013567999936640263
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,5120,64,0.013535999692976475
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,5120,128,0.00774399982765317
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,5120,32,0.014271999709308147
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,5120,64,0.007519999984651804
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,5120,32,0.013632000423967838
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,5120,32,0.007712000049650669
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,4096,12288,0.09795200079679489
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,4096,16384,0.06668800115585327
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,4096,12288,0.06083200126886368
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,4096,16384,0.12547199428081512
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,4096,16384,0.05596800148487091
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,4096,12288,0.046911999583244324
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,4096,10240,0.06931199878454208
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,4096,10240,0.0514880008995533
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,4096,8192,0.05564799904823303
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,4096,10240,0.03936000168323517
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,4096,7168,0.05516799911856651
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,4096,8192,0.06038400158286095
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,4096,7168,0.05484800040721893
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,4096,8192,0.033535998314619064
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,4096,65536,0.3782080113887787
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,4096,65536,0.252703994512558
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,4096,7168,0.02969600073993206
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,4096,6144,0.04755200073122978
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,4096,6144,0.039872001856565475
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,4096,5120,0.047680001705884933
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,4096,5120,0.04118400067090988
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,4096,6144,0.02687999978661537
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,4096,4096,0.033344000577926636
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,4096,5120,0.02304000034928322
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,4096,4096,0.03363199904561043
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,4096,3584,0.03232000023126602
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,4096,4096,0.020160000771284103
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,4096,3584,0.03187200054526329
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,4096,3072,0.03049599938094616
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,4096,3584,0.018464000895619392
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,4096,65536,0.2279359996318817
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,4096,3072,0.016863999888300896
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,4096,2560,0.026399999856948853
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,4096,2560,0.02521600015461445
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,4096,2560,0.015552000142633915
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,4096,2048,0.020128000527620316
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,4096,2048,0.02304000034928322
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,4096,2048,0.013791999779641628
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,4096,1536,0.01759999990463257
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,4096,1536,0.018848000094294548
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,4096,1536,0.012480000033974648
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,4096,1024,0.014303999952971935
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,4096,1024,0.01375999953597784
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,4096,768,0.013279999606311321
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,4096,1024,0.010975999757647514
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,4096,768,0.013535999692976475
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,4096,768,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,4096,512,0.011807999573647976
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,4096,512,0.01158399973064661
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,4096,3072,0.02691200003027916
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,4096,512,0.010879999957978725
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,4096,256,0.010208000428974628
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,4096,256,0.009855999611318111
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,4096,128,0.009824000298976898
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,4096,256,0.00886400043964386
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,4096,128,0.010015999898314476
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,4096,128,0.00774399982765317
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,4096,64,0.013055999763309956
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,4096,64,0.012959999963641167
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,4096,64,0.007552000228315592
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,4096,32,0.013183999806642532
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,4096,32,0.01360000018030405
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,4096,32,0.007679999805986881
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,3584,12288,0.08895999938249588
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,3584,12288,0.05318399891257286
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,3584,16384,0.07308799773454666
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,3584,16384,0.06857600063085556
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,3584,12288,0.04566400125622749
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,3584,16384,0.056384000927209854
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,3584,10240,0.046431999653577805
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,3584,10240,0.04940799996256828
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,3584,8192,0.05862399935722351
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,3584,8192,0.056223999708890915
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,3584,10240,0.0395519994199276
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,3584,8192,0.033055998384952545
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,3584,7168,0.05222399905323982
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,3584,65536,0.16812799870967865
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,3584,7168,0.05084799975156784
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,3584,65536,0.1616320013999939
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,3584,7168,0.03046399913728237
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,3584,6144,0.04412800073623657
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,3584,6144,0.04598399996757507
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,3584,5120,0.0382080003619194
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,3584,5120,0.039872001856565475
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,3584,6144,0.02598400041460991
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,3584,5120,0.02332800067961216
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,3584,4096,0.032255999743938446
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,3584,4096,0.03564799949526787
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,3584,4096,0.019711999222636223
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,3584,3584,0.03392000123858452
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,3584,3584,0.03033600002527237
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,3584,65536,0.20499199628829956
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,3584,3584,0.018144000321626663
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,3584,3072,0.02908799983561039
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,3584,3072,0.025599999353289604
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,3584,3072,0.016992000862956047
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,3584,2560,0.02380800060927868
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,3584,2560,0.022943999618291855
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,3584,2048,0.021088000386953354
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,3584,2048,0.019360000267624855
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,3584,2560,0.015359999611973763
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,3584,2048,0.014175999909639359
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,3584,1536,0.017055999487638474
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,3584,1536,0.01692800037562847
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,3584,1536,0.01235199999064207
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,3584,1024,0.014592000283300877
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,3584,1024,0.014208000153303146
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,3584,768,0.013183999806642532
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,3584,768,0.013055999763309956
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,3584,1024,0.01152000017464161
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,3584,512,0.011327999643981457
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,3584,512,0.01104000024497509
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,3584,768,0.013248000293970108
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,3584,512,0.010975999757647514
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,3584,256,0.010879999957978725
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,3584,256,0.010400000028312206
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,3584,128,0.009472000412642956
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,3584,256,0.008671999908983707
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,3584,128,0.0098879998549819
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,3584,64,0.013791999779641628
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,3584,64,0.013088000006973743
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,3584,128,0.007552000228315592
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,3584,64,0.007648000027984381
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,3584,32,0.01375999953597784
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,3584,32,0.013120000250637531
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,3584,32,0.007584000006318092
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,3072,12288,0.048895999789237976
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,3072,16384,0.06412799656391144
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,3072,16384,0.05455999821424484
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,3072,16384,0.05641600117087364
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,3072,12288,0.044895999133586884
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,3072,10240,0.05488000065088272
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,3072,10240,0.058559998869895935
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,3072,12288,0.06774400174617767
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,3072,10240,0.03907199949026108
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,3072,8192,0.054976001381874084
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,3072,8192,0.048767998814582825
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,3072,65536,0.14591999351978302
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,3072,65536,0.18988800048828125
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,3072,8192,0.03308799862861633
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,3072,7168,0.03824000060558319
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,3072,6144,0.045471999794244766
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,3072,7168,0.04992000013589859
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,3072,6144,0.03939199820160866
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,3072,7168,0.030559999868273735
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,3072,5120,0.03791999816894531
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,3072,6144,0.025855999439954758
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,3072,5120,0.034304000437259674
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,3072,4096,0.03081599995493889
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,3072,4096,0.033440001308918
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,3072,5120,0.02287999913096428
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,3072,4096,0.019840000197291374
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,3072,3584,0.029440000653266907
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,3072,65536,0.20127999782562256
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,3072,3584,0.028384000062942505
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,3072,3584,0.018015999346971512
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,3072,3072,0.02800000086426735
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,3072,3072,0.02489599958062172
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,3072,2560,0.023520000278949738
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,3072,2560,0.02208000048995018
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,3072,2048,0.02022399939596653
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,3072,3072,0.01679999940097332
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,3072,2560,0.014976000413298607
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,3072,2048,0.02131200022995472
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,3072,1536,0.017535999417304993
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,3072,2048,0.013567999936640263
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,3072,1536,0.016256000846624374
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,3072,1024,0.014112000353634357
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,3072,1536,0.012543999589979649
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,3072,1024,0.015359999611973763
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,3072,1024,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,3072,768,0.013279999606311321
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,3072,768,0.012768000364303589
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,3072,512,0.01119999960064888
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,3072,768,0.013151999562978745
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,3072,512,0.011872000060975552
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,3072,256,0.010847999714314938
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,3072,512,0.01104000024497509
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,3072,256,0.01033599954098463
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,3072,256,0.0081599997356534
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,3072,128,0.009600000455975533
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,3072,128,0.009503999724984169
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,3072,64,0.013472000136971474
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,3072,64,0.012703999876976013
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,3072,32,0.012927999719977379
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,3072,64,0.007584000006318092
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,3072,128,0.007935999892652035
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,3072,32,0.013279999606311321
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,3072,32,0.007584000006318092
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,2560,12288,0.05536000058054924
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,2560,16384,0.055456001311540604
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,2560,16384,0.062272001057863235
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,2560,12288,0.04249599948525429
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,2560,16384,0.039712000638246536
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,2560,10240,0.052639998495578766
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,2560,10240,0.04108799993991852
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,2560,12288,0.03593600168824196
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,2560,10240,0.02796800062060356
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,2560,8192,0.05020799860358238
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,2560,8192,0.03558399900794029
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,2560,7168,0.04303999990224838
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,2560,8192,0.022975999861955643
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,2560,7168,0.03232000023126602
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,2560,65536,0.13209599256515503
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,2560,7168,0.021183999255299568
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,2560,65536,0.1658560037612915
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,2560,6144,0.039135999977588654
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,2560,6144,0.0331839993596077
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,2560,5120,0.034015998244285583
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,2560,6144,0.026208000257611275
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,2560,5120,0.03407999873161316
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,2560,4096,0.03136000037193298
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,2560,4096,0.01990400068461895
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,2560,4096,0.028095999732613564
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,2560,3584,0.02703999914228916
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,2560,3584,0.025631999596953392
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,2560,5120,0.01708799973130226
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,2560,65536,0.1318719983100891
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,2560,3584,0.013919999822974205
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,2560,3072,0.025280000641942024
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,2560,3072,0.02160000056028366
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,2560,2560,0.019551999866962433
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,2560,3072,0.012959999963641167
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,2560,2560,0.021344000473618507
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,2560,2560,0.012640000320971012
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,2560,2048,0.018688000738620758
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,2560,2048,0.017376000061631203
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,2560,2048,0.014271999709308147
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,2560,1536,0.016416000202298164
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,2560,1024,0.014047999866306782
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,2560,1536,0.014911999925971031
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,2560,1536,0.010847999714314938
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,2560,1024,0.009952000342309475
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,2560,768,0.013407999649643898
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,2560,768,0.011487999930977821
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,2560,512,0.011648000217974186
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,2560,512,0.011008000001311302
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,2560,768,0.012160000391304493
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,2560,512,0.010623999871313572
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,2560,256,0.009535999968647957
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,2560,256,0.009696000255644321
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,2560,256,0.00848000030964613
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,2560,128,0.01017600018531084
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,2560,128,0.009503999724984169
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,2560,64,0.013439999893307686
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,2560,128,0.007807999849319458
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,2560,64,0.012799999676644802
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,2560,64,0.0074880002066493034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,2560,1024,0.013024000450968742
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,2560,32,0.012384000234305859
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,2560,32,0.013151999562978745
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,2560,32,0.007519999984651804
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,2048,12288,0.04089599847793579
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,2048,12288,0.037376001477241516
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,2048,16384,0.04524800181388855
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,2048,16384,0.04819199815392494
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,2048,16384,0.0350399985909462
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,2048,12288,0.031968001276254654
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,2048,10240,0.03747199848294258
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,2048,10240,0.033504001796245575
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,2048,10240,0.026208000257611275
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,2048,8192,0.03315199911594391
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,2048,8192,0.04134399816393852
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,2048,8192,0.02271999977529049
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,2048,7168,0.03657599911093712
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,2048,65536,0.12160000205039978
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,2048,65536,0.13990400731563568
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,2048,7168,0.029152000322937965
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,2048,6144,0.026976000517606735
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,2048,6144,0.027871999889612198
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,2048,6144,0.020800000056624413
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,2048,5120,0.024224000051617622
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,2048,5120,0.02848000079393387
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,2048,7168,0.020287999883294106
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,2048,5120,0.01616000011563301
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,2048,4096,0.022336000576615334
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,2048,4096,0.021536000072956085
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,2048,4096,0.015936000272631645
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,2048,3584,0.02006400004029274
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,2048,3584,0.030400000512599945
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,2048,3584,0.013791999779641628
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,2048,65536,0.11974400281906128
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,2048,3072,0.027327999472618103
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,2048,3072,0.018912000581622124
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,2048,3072,0.013567999936640263
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,2048,2560,0.016224000602960587
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,2048,2560,0.017184000462293625
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,2048,2048,0.016287999227643013
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,2048,2560,0.01228800043463707
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,2048,2048,0.014751999638974667
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,2048,1536,0.014655999839305878
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,2048,1536,0.013919999822974205
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,2048,2048,0.012128000147640705
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,2048,1536,0.010847999714314938
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,2048,1024,0.012768000364303589
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,2048,1024,0.015200000256299973
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,2048,768,0.011680000461637974
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,2048,1024,0.011008000001311302
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,2048,768,0.01369599997997284
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,2048,768,0.009440000168979168
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,2048,512,0.01033599954098463
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,2048,512,0.010048000141978264
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,2048,256,0.01027199998497963
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,2048,256,0.01056000031530857
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,2048,512,0.0098879998549819
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,2048,256,0.008576000109314919
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,2048,128,0.008991999551653862
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,2048,128,0.01017600018531084
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,2048,64,0.012480000033974648
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,2048,128,0.007615999784320593
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,2048,64,0.012992000207304955
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,2048,32,0.012736000120639801
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,2048,64,0.007455999962985516
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,2048,32,0.013055999763309956
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,2048,32,0.007360000163316727
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,1536,12288,0.03347200155258179
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,1536,12288,0.037151999771595
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,1536,16384,0.03999999910593033
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,1536,16384,0.04323200136423111
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,1536,16384,0.03500799834728241
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,1536,12288,0.029888000339269638
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,1536,10240,0.03680000081658363
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,1536,10240,0.030751999467611313
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,1536,8192,0.02860799990594387
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,1536,10240,0.02566399984061718
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,1536,7168,0.027583999559283257
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,1536,8192,0.026079999282956123
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,1536,65536,0.0936959981918335
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,1536,65536,0.10969600081443787
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,1536,65536,0.11686400324106216
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,1536,7168,0.02627200074493885
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,1536,6144,0.026623999699950218
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,1536,7168,0.020416000857949257
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,1536,6144,0.026016000658273697
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,1536,5120,0.02659199945628643
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,1536,6144,0.017664000391960144
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,1536,5120,0.02412799932062626
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,1536,5120,0.016672000288963318
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,1536,4096,0.022975999861955643
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,1536,4096,0.021183999255299568
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,1536,8192,0.02751999907195568
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,1536,3584,0.01945599913597107
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,1536,4096,0.015552000142633915
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,1536,3584,0.02070399932563305
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,1536,3584,0.014399999752640724
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,1536,3072,0.01990400068461895
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,1536,3072,0.018464000895619392
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,1536,3072,0.012543999589979649
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,1536,2560,0.016063999384641647
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,1536,2560,0.017152000218629837
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,1536,2560,0.012128000147640705
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,1536,2048,0.015615999698638916
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,1536,2048,0.0144640002399683
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,1536,2048,0.013632000423967838
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,1536,1536,0.014495999552309513
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,1536,1536,0.013311999849975109
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,1536,1536,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,1536,1024,0.01196799986064434
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,1536,1024,0.01190400030463934
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,1536,1024,0.009440000168979168
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,1536,768,0.011487999930977821
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,1536,768,0.011327999643981457
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,1536,512,0.010080000385642052
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,1536,768,0.011359999887645245
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,1536,512,0.1607999950647354
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,1536,512,0.008991999551653862
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,1536,256,0.009279999881982803
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,1536,256,0.009759999811649323
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,1536,256,0.007968000136315823
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,1536,128,0.009056000038981438
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,1536,128,0.008895999751985073
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,1536,128,0.007807999849319458
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,1536,64,0.012384000234305859
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,1536,64,0.01283199992030859
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,1536,64,0.007552000228315592
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,1536,32,0.012992000207304955
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,1536,32,0.012864000163972378
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,1536,32,0.007071999832987785
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,1024,12288,0.03296000137925148
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,1024,12288,0.02940800040960312
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,1024,16384,0.04287999868392944
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,1024,16384,0.03558399900794029
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,1024,16384,0.041760001331567764
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,1024,12288,0.02848000079393387
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,1024,10240,0.03651199862360954
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,1024,10240,0.02828799933195114
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,1024,8192,0.027264000847935677
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,1024,8192,0.026367999613285065
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,1024,10240,0.03868800029158592
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,1024,8192,0.021568000316619873
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,1024,65536,0.07548800110816956
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,1024,7168,0.02595200017094612
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,1024,7168,0.025567999109625816
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,1024,65536,0.08086399734020233
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,1024,6144,0.02908799983561039
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,1024,7168,0.029664000496268272
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,1024,6144,0.025887999683618546
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,1024,6144,0.019519999623298645
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,1024,5120,0.025760000571608543
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,1024,5120,0.021663999184966087
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,1024,5120,0.01616000011563301
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,1024,4096,0.021503999829292297
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,1024,4096,0.020576000213623047
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,1024,65536,0.11680000275373459
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,1024,4096,0.014655999839305878
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,1024,3584,0.02054399996995926
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,1024,3584,0.019680000841617584
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,1024,3584,0.01756799966096878
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,1024,3072,0.01724799908697605
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,1024,3072,0.0180479995906353
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,1024,2560,0.016831999644637108
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,1024,3072,0.012575999833643436
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,1024,2560,0.01603199914097786
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,1024,2560,0.011487999930977821
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,1024,2048,0.01571200042963028
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,1024,2048,0.014944000169634819
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,1024,1536,0.013311999849975109
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,1024,1536,0.01321600005030632
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,1024,2048,0.011008000001311302
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,1024,1536,0.010944000445306301
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,1024,1024,0.011648000217974186
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,1024,1024,0.011807999573647976
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,1024,1024,0.010495999827980995
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,1024,768,0.010975999757647514
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,1024,768,0.011168000288307667
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,1024,768,0.008704000152647495
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,1024,512,0.01033599954098463
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,1024,512,0.009727999567985535
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,1024,512,0.009952000342309475
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,1024,256,0.009568000212311745
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,1024,256,0.009696000255644321
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,1024,256,0.0081599997356534
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,1024,128,0.00863999966531992
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,1024,128,0.008991999551653862
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,1024,64,0.01228800043463707
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,1024,64,0.012799999676644802
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,1024,64,0.0072639998979866505
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,1024,32,0.013856000266969204
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,1024,32,0.013055999763309956
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,1024,32,0.007040000054985285
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,768,12288,0.028704000636935234
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,768,16384,0.04201599955558777
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,768,16384,0.03417599946260452
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,768,16384,0.034272000193595886
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,768,12288,0.028704000636935234
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,1024,128,0.007679999805986881
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,768,12288,0.04383999854326248
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,768,10240,0.028031999245285988
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,768,10240,0.027775999158620834
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,768,8192,0.026655999943614006
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,768,10240,0.038047999143600464
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,768,65536,0.06787200272083282
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,768,8192,0.023104000836610794
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,768,7168,0.0226879995316267
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,768,8192,0.03171199932694435
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,768,65536,0.0692799985408783
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,768,7168,0.021983999758958817
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,768,7168,0.029600000008940697
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,768,6144,0.02223999984562397
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,768,5120,0.022143999114632607
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,768,6144,0.024992000311613083
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,768,5120,0.021407999098300934
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,768,6144,0.02239999920129776
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,768,4096,0.020160000771284103
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,768,5120,0.01635199971497059
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,768,65536,0.11744000017642975
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,768,4096,0.02067199908196926
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,768,4096,0.01414399966597557
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,768,3584,0.019360000267624855
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,768,3584,0.019200000911951065
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,768,3072,0.017184000462293625
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,768,3072,0.017952000722289085
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,768,3584,0.013279999606311321
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,768,2560,0.015647999942302704
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,768,3072,0.012512000277638435
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,768,2560,0.016416000202298164
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,768,2048,0.015615999698638916
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,768,2560,0.012575999833643436
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,768,2048,0.014751999638974667
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,768,1536,0.01369599997997284
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,768,2048,0.010623999871313572
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,768,1536,0.012959999963641167
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,768,1536,0.01065600011497736
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,768,1024,0.011839999817311764
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,768,1024,0.011648000217974186
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,768,1024,0.009503999724984169
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,768,768,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,768,768,0.010847999714314938
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,768,768,0.011327999643981457
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,768,512,0.009727999567985535
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,768,256,0.009535999968647957
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,768,512,0.010367999784648418
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,768,512,0.008960000239312649
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,768,256,0.009216000325977802
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,768,256,0.007903999648988247
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,768,128,0.00886400043964386
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,768,128,0.008991999551653862
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,768,128,0.0071680000983178616
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,768,64,0.012959999963641167
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,768,64,0.012575999833643436
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,768,64,0.007519999984651804
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,768,32,0.012736000120639801
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,768,32,0.012703999876976013
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,768,32,0.00723200011998415
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,512,12288,0.026528000831604004
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,512,12288,0.03136000037193298
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,512,16384,0.028831999748945236
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,512,16384,0.029184000566601753
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,512,16384,0.05417599901556969
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,512,12288,0.04428799822926521
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,512,10240,0.030208000913262367
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,512,10240,0.02598400041460991
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,512,10240,0.038336001336574554
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,512,8192,0.023360000923275948
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,512,8192,0.022463999688625336
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,512,8192,0.03235200047492981
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,512,65536,0.05619199946522713
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,512,7168,0.02921600081026554
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,512,65536,0.058111999183893204
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,512,7168,0.02175999991595745
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,512,6144,0.023552000522613525
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,512,7168,0.020447999238967896
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,512,5120,0.022143999114632607
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,512,6144,0.022143999114632607
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,512,5120,0.021183999255299568
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,512,6144,0.01740800030529499
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,512,65536,0.11683200299739838
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,512,5120,0.01532800029963255
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,512,3584,0.01961600035429001
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,512,4096,0.02236800082027912
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,512,3584,0.020160000771284103
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,512,4096,0.01548799965530634
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,512,3584,0.013248000293970108
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,512,3072,0.017535999417304993
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,512,3072,0.017376000061631203
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,512,2560,0.01648000068962574
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,512,2560,0.0163199994713068
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,512,2048,0.014112000353634357
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,512,3072,0.012256000190973282
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,512,2560,0.011935999616980553
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,512,2048,0.014399999752640724
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,512,2048,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,512,1536,0.013663999736309052
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,512,1536,0.013632000423967838
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,512,1024,0.011872000060975552
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,512,1536,0.01017600018531084
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,512,1024,0.01152000017464161
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,512,4096,0.0191040001809597
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,512,1024,0.008832000195980072
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,512,768,0.010879999957978725
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,512,768,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,512,512,0.010111999697983265
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,512,768,0.010944000445306301
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,512,512,0.010080000385642052
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,512,256,0.008799999952316284
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,512,512,0.009568000212311745
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,512,256,0.009184000082314014
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,512,256,0.00800000037997961
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,512,128,0.008960000239312649
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,512,128,0.008832000195980072
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,512,64,0.012415999546647072
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,512,64,0.01235199999064207
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,512,128,0.007199999876320362
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,512,64,0.007104000076651573
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,512,32,0.012415999546647072
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,512,32,0.012959999963641167
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,512,32,0.0072639998979866505
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,256,12288,0.02940800040960312
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,256,12288,0.024224000051617622
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,256,16384,0.03177599981427193
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,256,16384,0.02735999971628189
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,256,16384,0.039455998688936234
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,256,12288,0.028416000306606293
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,256,10240,0.02848000079393387
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,256,10240,0.02518399991095066
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,256,10240,0.03903999924659729
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,256,8192,0.028416000306606293
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,256,8192,0.021407999098300934
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,256,65536,0.05580800026655197
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,256,65536,0.045152001082897186
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,256,7168,0.021247999742627144
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,256,6144,0.026048000901937485
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,256,8192,0.031968001276254654
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,256,7168,0.020191999152302742
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,256,7168,0.020896000787615776
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,256,6144,0.02191999927163124
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,256,5120,0.020416000857949257
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,256,6144,0.024320000782608986
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,256,5120,0.023679999634623528
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,256,5120,0.01756799966096878
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,256,4096,0.01974399946630001
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,256,65536,0.11555200070142746
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,256,4096,0.021407999098300934
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,256,4096,0.018559999763965607
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,256,3584,0.01881599985063076
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,256,3584,0.019328000023961067
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,256,3072,0.01775999926030636
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,256,3072,0.018719999119639397
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,256,2560,0.016543999314308167
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,256,3072,0.012160000391304493
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,256,3584,0.013248000293970108
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,256,2560,0.011392000131309032
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,256,2560,0.016416000202298164
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,256,2048,0.015296000055968761
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,256,1536,0.013248000293970108
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,256,2048,0.014816000126302242
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,256,2048,0.011487999930977821
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,256,1536,0.012864000163972378
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,256,1536,0.009759999811649323
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,256,1024,0.011872000060975552
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,256,1024,0.011776000261306763
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,256,1024,0.009119999594986439
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,256,768,0.010912000201642513
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,256,768,0.010400000028312206
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,256,768,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,256,512,0.009759999811649323
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,256,512,0.010208000428974628
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,256,512,0.009503999724984169
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,256,256,0.009056000038981438
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,256,256,0.009279999881982803
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,256,128,0.00863999966531992
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,256,256,0.007968000136315823
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,256,128,0.00902399979531765
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,256,128,0.007071999832987785
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,256,64,0.012703999876976013
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,256,64,0.012671999633312225
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,256,64,0.0072639998979866505
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,256,32,0.012384000234305859
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,256,32,0.012640000320971012
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,256,32,0.007199999876320362
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,128,12288,0.037087999284267426
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,128,12288,0.02691200003027916
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,128,16384,0.03315199911594391
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,128,16384,0.054207999259233475
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,128,12288,0.028511999174952507
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,128,10240,0.026240000501275063
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,128,10240,0.025919999927282333
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,128,8192,0.025439999997615814
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,128,8192,0.02550400048494339
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,128,10240,0.024607999250292778
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,128,65536,0.052191998809576035
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,128,8192,0.021247999742627144
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,128,16384,0.028704000636935234
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,128,65536,0.0498879998922348
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,128,7168,0.026016000658273697
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,128,6144,0.023391999304294586
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,128,7168,0.025760000571608543
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,128,6144,0.023744000121951103
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,128,7168,0.020416000857949257
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,128,5120,0.021695999428629875
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,128,6144,0.016992000862956047
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,128,65536,0.11648000031709671
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,128,5120,0.02179200015962124
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,128,4096,0.020576000213623047
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,128,4096,0.020608000457286835
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,128,5120,0.01679999940097332
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,128,3584,0.019967999309301376
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,128,4096,0.013535999692976475
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,128,3584,0.019487999379634857
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,128,3584,0.013151999562978745
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,128,3072,0.01852799952030182
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,128,3072,0.017823999747633934
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,128,3072,0.016287999227643013
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,128,2560,0.016224000602960587
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,128,2560,0.015263999812304974
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,128,2048,0.014431999996304512
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,128,2560,0.011392000131309032
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,128,2048,0.014911999925971031
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,128,1536,0.013504000380635262
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,128,2048,0.012864000163972378
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,128,1536,0.012959999963641167
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,128,1536,0.009696000255644321
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,128,1024,0.010879999957978725
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,128,1024,0.011680000461637974
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,128,1024,0.009855999611318111
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,128,768,0.01119999960064888
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,128,768,0.010879999957978725
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,128,768,0.010623999871313572
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,128,512,0.0098879998549819
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,128,512,0.010015999898314476
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,128,256,0.00902399979531765
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,128,512,0.009119999594986439
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,128,256,0.009184000082314014
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,128,256,0.007712000049650669
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,128,128,0.00886400043964386
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,128,128,0.008895999751985073
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,128,128,0.007296000141650438
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,128,64,0.012927999719977379
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,128,64,0.01235199999064207
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,128,32,0.01235199999064207
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,128,32,0.012799999676644802
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,128,64,0.0066559999249875546
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,128,32,0.007007999811321497
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,64,12288,0.025919999927282333
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,64,12288,0.026655999943614006
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,64,16384,0.028991999104619026
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,64,16384,0.02828799933195114
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,64,16384,0.03468799963593483
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,64,12288,0.02816000021994114
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,64,10240,0.02611199952661991
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,64,10240,0.026240000501275063
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,64,10240,0.02486399933695793
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,64,8192,0.02582399919629097
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,64,8192,0.025407999753952026
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,64,8192,0.021183999255299568
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,64,7168,0.02595200017094612
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,64,65536,0.050624001771211624
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,64,65536,0.0517439991235733
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,64,65536,0.14195199310779572
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,64,7168,0.02143999934196472
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,64,7168,0.028736000880599022
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,64,6144,0.02236800082027912
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,64,6144,0.021376000717282295
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,64,5120,0.02175999991595745
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,64,5120,0.021056000143289566
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,64,6144,0.017664000391960144
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,64,4096,0.019967999309301376
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,64,5120,0.015615999698638916
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,64,4096,0.02099199965596199
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,64,3584,0.0197759997099638
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,64,4096,0.018848000094294548
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,64,3584,0.019328000023961067
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,64,3584,0.014431999996304512
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,64,3072,0.01852799952030182
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,64,3072,0.01744000054895878
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,64,3072,0.012959999963641167
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,64,2560,0.01539199985563755
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,64,2560,0.016863999888300896
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,64,2048,0.014816000126302242
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,64,2560,0.01119999960064888
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,64,2048,0.012864000163972378
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,64,2048,0.014720000326633453
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,64,1536,0.013024000450968742
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,64,1536,0.009375999681651592
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,64,1024,0.01119999960064888
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,64,1024,0.01152000017464161
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,64,1024,0.008895999751985073
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,64,768,0.010400000028312206
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,64,768,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,64,768,0.008832000195980072
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,64,512,0.009279999881982803
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,64,512,0.009952000342309475
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,64,256,0.00902399979531765
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,64,256,0.008704000152647495
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,64,512,0.008799999952316284
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,64,256,0.0074880002066493034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,64,128,0.008736000396311283
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,64,128,0.00825599953532219
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,64,128,0.007199999876320362
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,64,64,0.012000000104308128
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,64,64,0.011935999616980553
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,64,32,0.012543999589979649
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,64,32,0.012575999833643436
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,64,64,0.00687999976798892
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,64,32,0.006432000081986189
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,64,1536,0.012959999963641167
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,32,12288,0.02672000043094158
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,32,12288,0.02707199938595295
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,32,16384,0.02844800055027008
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,32,16384,0.028384000062942505
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,32,16384,0.05222399905323982
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,32,10240,0.026176000013947487
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,32,12288,0.03939199820160866
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,32,10240,0.025855999439954758
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,32,10240,0.036159999668598175
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,32,8192,0.026464000344276428
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,32,8192,0.02502400055527687
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,32,7168,0.023104000836610794
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,32,8192,0.028255999088287354
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,32,7168,0.02550400048494339
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,32,65536,0.05145600065588951
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,32,65536,0.05071999877691269
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,32,7168,0.027615999802947044
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,32,6144,0.024032000452280045
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,32,6144,0.021568000316619873
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,32,5120,0.022592000663280487
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,32,6144,0.016767999157309532
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,32,5120,0.021247999742627144
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,32,65536,0.19411200284957886
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,32,4096,0.02038400061428547
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,32,5120,0.02054399996995926
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,32,4096,0.020800000056624413
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,32,4096,0.01913600042462349
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,32,3584,0.01897599920630455
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,32,3584,0.019200000911951065
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,32,3584,0.01635199971497059
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,32,3072,0.0180479995906353
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,32,3072,0.016736000776290894
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,32,3072,0.014816000126302242
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,32,2560,0.015807999297976494
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,32,2560,0.01648000068962574
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,32,2560,0.013856000266969204
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,32,2048,0.014879999682307243
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,32,2048,0.01462399959564209
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,32,1536,0.012703999876976013
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,32,2048,0.012671999633312225
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,32,1536,0.012927999719977379
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,32,1024,0.011231999844312668
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,32,1024,0.011711999773979187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,32,1536,0.010847999714314938
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,32,768,0.010432000271975994
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,32,768,0.010367999784648418
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,32,1024,0.009568000212311745
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,32,768,0.009920000098645687
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,32,512,0.009664000011980534
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,32,512,0.009983999654650688
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,32,512,0.008704000152647495
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,32,256,0.009216000325977802
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,32,256,0.009056000038981438
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,32,256,0.007135999854654074
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,32,128,0.008352000266313553
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,32,128,0.008736000396311283
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,32,64,0.012512000277638435
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,32,128,0.007104000076651573
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,32,64,0.012543999589979649
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,160,32,32,0.012319999746978283
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,32,64,0.006976000033318996
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,160,32,32,0.012032000347971916
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,160,32,32,0.006591999903321266
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,65536,12288,0.31590399146080017
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,65536,10240,0.26761600375175476
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,65536,12288,0.2964479923248291
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,65536,16384,0.41523200273513794
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,65536,12288,0.30953601002693176
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,65536,16384,0.393312007188797
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,65536,8192,0.21721599996089935
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,65536,10240,0.24771200120449066
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,65536,16384,0.40751999616622925
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,65536,8192,0.20233599841594696
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,65536,7168,0.1955839991569519
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,65536,10240,0.2595199942588806
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,65536,7168,0.18000000715255737
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,65536,8192,0.21526400744915009
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,65536,6144,0.15452800691127777
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,65536,7168,0.191103994846344
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,65536,4096,0.10793600231409073
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,65536,6144,0.16806399822235107
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,65536,5120,0.13167999684810638
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,65536,4096,0.11990399658679962
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,65536,5120,0.1451520025730133
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,65536,6144,0.1629440039396286
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,65536,5120,0.14246399700641632
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,65536,3584,0.09603200107812881
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,65536,3584,0.10582400113344193
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,65536,3072,0.0926079973578453
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,65536,4096,0.11548800021409988
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,65536,3584,0.10406400263309479
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,65536,2560,0.07260800153017044
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,65536,3072,0.08483199775218964
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,65536,2560,0.07942400127649307
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,65536,3072,0.09424000233411789
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,65536,2048,0.06163199990987778
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,65536,2048,0.06531199812889099
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,65536,1536,0.04972799867391586
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,65536,2560,0.08131200075149536
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,65536,1536,0.05238400027155876
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,65536,2048,0.06774400174617767
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,65536,1024,0.04064000025391579
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,65536,1536,0.056095998734235764
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,65536,1024,0.03776000067591667
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,65536,768,0.0326399989426136
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,65536,1024,0.04214400053024292
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,65536,768,0.0342399999499321
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,65536,512,0.02521600015461445
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,65536,256,0.01744000054895878
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,65536,768,0.034591998904943466
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,65536,512,0.024831999093294144
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,65536,256,0.01756799966096878
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,65536,512,0.028736000880599022
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,65536,128,0.01462399959564209
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,65536,256,0.022143999114632607
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,65536,128,0.015135999768972397
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,65536,64,0.017216000705957413
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,65536,64,0.017855999991297722
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,65536,128,0.018848000094294548
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,65536,32,0.022048000246286392
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,65536,64,0.016607999801635742
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,65536,32,0.022016000002622604
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,65536,32,0.016863999888300896
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,16384,12288,0.08092799782752991
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,16384,12288,0.0870399996638298
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,16384,16384,0.11302399635314941
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,16384,16384,0.10553599894046783
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,16384,16384,0.1202239990234375
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,16384,10240,0.07427199929952621
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,16384,12288,0.09513600170612335
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,16384,10240,0.0846719965338707
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,16384,8192,0.0703359991312027
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,16384,8192,0.06159999966621399
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,16384,10240,0.08124800026416779
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,16384,7168,0.0560000017285347
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,16384,8192,0.06524799764156342
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,16384,65536,0.38447999954223633
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,16384,7168,0.05071999877691269
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,16384,6144,0.050624001771211624
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,16384,6144,0.0549440011382103
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,16384,7168,0.062431998550891876
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,16384,5120,0.03868800029158592
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,16384,5120,0.043327998369932175
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,16384,6144,0.05382400006055832
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,16384,65536,0.3959360122680664
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,16384,4096,0.03596799820661545
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,16384,4096,0.033663999289274216
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,16384,5120,0.047168001532554626
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,16384,3584,0.03270399942994118
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,16384,4096,0.03936000168323517
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,16384,3584,0.03126399964094162
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,16384,3072,0.03094400092959404
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,16384,3072,0.031199999153614044
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,16384,3584,0.0331839993596077
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,16384,2560,0.02630399912595749
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,16384,3072,0.032416000962257385
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,16384,2560,0.02518399991095066
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,16384,2048,0.022463999688625336
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,16384,2560,0.0289280004799366
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,16384,2048,0.021856000646948814
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,16384,1536,0.019487999379634857
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,16384,2048,0.025151999667286873
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,16384,1536,0.018464000895619392
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,16384,1536,0.023072000592947006
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,16384,1024,0.01635199971497059
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,16384,1024,0.016287999227643013
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,16384,768,0.0144640002399683
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,16384,768,0.014047999866306782
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,16384,1024,0.016831999644637108
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,16384,65536,0.46428799629211426
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,16384,768,0.015615999698638916
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,16384,512,0.01196799986064434
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,16384,256,0.010143999941647053
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,16384,512,0.012160000391304493
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,16384,512,0.013055999763309956
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,16384,256,0.010080000385642052
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,16384,128,0.009119999594986439
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,16384,128,0.009151999838650227
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,16384,128,0.009920000098645687
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,16384,64,0.012512000277638435
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,16384,64,0.012927999719977379
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,16384,64,0.009727999567985535
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,16384,32,0.014112000353634357
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,16384,32,0.014240000396966934
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,16384,32,0.010111999697983265
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,16384,256,0.010912000201642513
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,12288,12288,0.08089599758386612
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,12288,16384,0.10476800054311752
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,12288,16384,0.10291200131177902
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,12288,12288,0.08070400357246399
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,12288,16384,0.09353599697351456
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,12288,10240,0.06992000341415405
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,12288,12288,0.0748480036854744
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,12288,10240,0.06876800209283829
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,12288,8192,0.05820799991488457
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,12288,8192,0.05740800127387047
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,12288,10240,0.06351999938488007
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,12288,65536,0.3866879940032959
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,12288,7168,0.050303999334573746
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,12288,8192,0.051552001386880875
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,12288,7168,0.050335999578237534
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,12288,6144,0.05455999821424484
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,12288,6144,0.046112000942230225
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,12288,65536,0.3740159869194031
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,12288,7168,0.04879999905824661
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,12288,5120,0.04041599854826927
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,12288,6144,0.04095999896526337
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,12288,5120,0.046751998364925385
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,12288,4096,0.03251200169324875
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,12288,4096,0.03359999880194664
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,12288,5120,0.03686400130391121
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,12288,4096,0.03081599995493889
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,12288,3584,0.031007999554276466
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,12288,3584,0.03062400035560131
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,12288,3584,0.02687999978661537
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,12288,3072,0.027168000116944313
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,12288,3072,0.031168000772595406
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,12288,2560,0.024960000067949295
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,12288,2560,0.02796800062060356
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,12288,3072,0.025728000327944756
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,12288,65536,0.3540799915790558
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,12288,2560,0.02287999913096428
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,12288,2048,0.021503999829292297
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,12288,2048,0.021023999899625778
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,12288,2048,0.020896000787615776
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,12288,1536,0.019680000841617584
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,12288,1536,0.019200000911951065
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,12288,1536,0.017472000792622566
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,12288,1024,0.01568000018596649
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,12288,1024,0.015615999698638916
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,12288,768,0.01414399966597557
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,12288,512,0.011839999817311764
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,12288,768,0.01398400031030178
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,12288,1024,0.01462399959564209
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,12288,768,0.01414399966597557
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,12288,512,0.011744000017642975
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,12288,512,0.011711999773979187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,12288,256,0.010048000141978264
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,12288,256,0.00979200005531311
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,12288,128,0.009216000325977802
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,12288,256,0.010879999957978725
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,12288,128,0.009088000282645226
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,12288,64,0.012543999589979649
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,12288,128,0.009824000298976898
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,12288,64,0.013055999763309956
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,12288,32,0.013183999806642532
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,12288,64,0.00886400043964386
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,12288,32,0.013407999649643898
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,12288,32,0.009088000282645226
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,10240,12288,0.08124800026416779
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,10240,16384,0.10335999727249146
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,10240,12288,0.08064000308513641
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,10240,16384,0.10236799716949463
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,10240,16384,0.08873599767684937
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,10240,12288,0.06911999732255936
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,10240,10240,0.069023996591568
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,10240,10240,0.06937599927186966
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,10240,10240,0.059776000678539276
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,10240,8192,0.05740800127387047
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,10240,8192,0.056671999394893646
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,10240,65536,0.38201600313186646
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,10240,7168,0.05052800104022026
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,10240,8192,0.04896000027656555
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,10240,7168,0.05116799846291542
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,10240,7168,0.047648001462221146
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,10240,6144,0.0453759990632534
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,10240,65536,0.37408000230789185
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,10240,6144,0.04473600164055824
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,10240,6144,0.039103999733924866
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,10240,5120,0.0387520007789135
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,10240,4096,0.033055998384952545
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,10240,4096,0.03872000053524971
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,10240,5120,0.03753599897027016
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,10240,4096,0.02953599952161312
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,10240,3584,0.03152000159025192
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,10240,3584,0.0307839997112751
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,10240,3584,0.025887999683618546
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,10240,3072,0.026688000187277794
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,10240,3072,0.02703999914228916
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,10240,2560,0.024000000208616257
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,10240,3072,0.024768000468611717
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,10240,2560,0.024191999807953835
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,10240,65536,0.32633599638938904
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,10240,5120,0.046271998435258865
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,10240,2048,0.020927999168634415
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,10240,2048,0.021344000473618507
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,10240,2560,0.02179200015962124
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,10240,2048,0.01961600035429001
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,10240,1536,0.017791999503970146
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,10240,1536,0.016992000862956047
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,10240,1536,0.01817600056529045
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,10240,1024,0.015296000055968761
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,10240,1024,0.015519999898970127
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,10240,768,0.01414399966597557
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,10240,768,0.013856000266969204
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,10240,1024,0.014944000169634819
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,10240,512,0.012671999633312225
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,10240,512,0.01206399966031313
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,10240,768,0.013120000250637531
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,10240,512,0.01152000017464161
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,10240,256,0.010015999898314476
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,10240,256,0.0098879998549819
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,10240,256,0.010463999584317207
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,10240,128,0.009151999838650227
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,10240,128,0.009088000282645226
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,10240,64,0.012799999676644802
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,10240,128,0.009056000038981438
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,10240,64,0.012799999676644802
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,10240,64,0.009088000282645226
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,10240,32,0.013663999736309052
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,10240,32,0.013407999649643898
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,10240,32,0.008736000396311283
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,8192,12288,0.056095998734235764
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,8192,16384,0.06995200365781784
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,8192,12288,0.0517439991235733
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,8192,16384,0.06275200098752975
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,8192,16384,0.06249599903821945
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,8192,12288,0.04975999891757965
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,8192,10240,0.047359999269247055
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,8192,10240,0.049855999648571014
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,8192,10240,0.0427200011909008
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,8192,8192,0.04134399816393852
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,8192,8192,0.040863998234272
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,8192,7168,0.039712000638246536
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,8192,65536,0.19843199849128723
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,8192,8192,0.03513599932193756
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,8192,7168,0.036288000643253326
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,8192,7168,0.03500799834728241
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,8192,65536,0.24611200392246246
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,8192,6144,0.03129599988460541
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,8192,6144,0.03276799991726875
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,8192,5120,0.03936000168323517
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,8192,5120,0.03359999880194664
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,8192,6144,0.02796800062060356
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,8192,4096,0.028543999418616295
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,8192,4096,0.023744000121951103
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,8192,4096,0.023104000836610794
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,8192,5120,0.026623999699950218
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,8192,3584,0.027904000133275986
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,8192,3584,0.022975999861955643
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,8192,3584,0.020576000213623047
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,8192,65536,0.24428799748420715
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,8192,3072,0.025855999439954758
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,8192,3072,0.026784000918269157
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,8192,2560,0.024383999407291412
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,8192,3072,0.020031999796628952
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,8192,2560,0.017503999173641205
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,8192,2560,0.017472000792622566
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,8192,2048,0.020320000126957893
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,8192,1536,0.02143999934196472
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,8192,2048,0.022784000262618065
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,8192,1536,0.017343999817967415
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,8192,2048,0.015744000673294067
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,8192,1536,0.014208000153303146
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,8192,1024,0.011872000060975552
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,8192,1024,0.015072000212967396
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,8192,768,0.016543999314308167
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,8192,1024,0.012128000147640705
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,8192,512,0.01235199999064207
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,8192,768,0.01235199999064207
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,8192,768,0.011935999616980553
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,8192,512,0.009759999811649323
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,8192,256,0.009375999681651592
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,8192,512,0.011744000017642975
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,8192,256,0.009119999594986439
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,8192,128,0.008576000109314919
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,8192,64,0.01228800043463707
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,8192,128,0.00825599953532219
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,8192,256,0.009568000212311745
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,8192,128,0.008927999995648861
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,8192,64,0.012032000347971916
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,8192,64,0.008736000396311283
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,8192,32,0.013151999562978745
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,8192,32,0.012671999633312225
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,8192,32,0.00848000030964613
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,7168,12288,0.069023996591568
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,7168,12288,0.0525440014898777
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,7168,16384,0.058559998869895935
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,7168,16384,0.05801599845290184
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,7168,12288,0.047648001462221146
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,7168,10240,0.04639999940991402
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,7168,10240,0.044256001710891724
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,7168,16384,0.06550399959087372
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,7168,8192,0.04960000142455101
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,7168,8192,0.03846399858593941
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,7168,65536,0.17772799730300903
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,7168,10240,0.041471999138593674
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,7168,8192,0.03296000137925148
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,7168,65536,0.22483199834823608
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,7168,7168,0.03619199991226196
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,7168,7168,0.034432001411914825
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,7168,6144,0.03244800120592117
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,7168,6144,0.03046399913728237
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,7168,7168,0.03296000137925148
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,7168,5120,0.02707199938595295
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,7168,6144,0.02751999907195568
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,7168,5120,0.02751999907195568
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,7168,4096,0.023391999304294586
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,7168,5120,0.02582399919629097
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,7168,4096,0.022784000262618065
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,7168,4096,0.02163200080394745
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,7168,3584,0.021088000386953354
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,7168,3584,0.031328000128269196
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,7168,3584,0.01942400075495243
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,7168,65536,0.22627200186252594
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,7168,3072,0.018719999119639397
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,7168,3072,0.01958400011062622
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,7168,3072,0.01849599927663803
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,7168,2560,0.017152000218629837
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,7168,2560,0.01727999933063984
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,7168,2560,0.01696000061929226
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,7168,2048,0.015456000342965126
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,7168,2048,0.015359999611973763
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,7168,1536,0.013952000066637993
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,7168,2048,0.01708799973130226
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,7168,1536,0.013504000380635262
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,7168,1536,0.013344000093638897
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,7168,1024,0.011615999974310398
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,7168,1024,0.0144640002399683
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,7168,768,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,7168,1024,0.015615999698638916
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,7168,768,0.011648000217974186
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,7168,512,0.010015999898314476
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,7168,768,0.013311999849975109
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,7168,512,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,7168,512,0.011648000217974186
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,7168,256,0.009759999811649323
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,7168,256,0.008767999708652496
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,7168,256,0.009503999724984169
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,7168,128,0.008991999551653862
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,7168,128,0.008511999621987343
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,7168,128,0.009759999811649323
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,7168,64,0.01196799986064434
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,7168,64,0.012575999833643436
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,7168,64,0.008448000065982342
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,7168,32,0.012543999589979649
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,7168,32,0.012896000407636166
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,7168,32,0.008799999952316284
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,6144,12288,0.047839999198913574
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,6144,12288,0.04572800174355507
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,6144,16384,0.05366399884223938
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,6144,16384,0.05753599852323532
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,6144,16384,0.05718399956822395
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,6144,10240,0.043296001851558685
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,6144,12288,0.04540799930691719
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,6144,10240,0.04022400081157684
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,6144,8192,0.03766399994492531
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,6144,10240,0.03929600119590759
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,6144,8192,0.04054399952292442
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,6144,8192,0.03251200169324875
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,6144,7168,0.03747199848294258
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,6144,65536,0.1555200070142746
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,6144,7168,0.043807998299598694
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,6144,6144,0.03999999910593033
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,6144,6144,0.031488001346588135
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,6144,65536,0.19382399320602417
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,6144,7168,0.032127998769283295
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,6144,5120,0.02707199938595295
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,6144,6144,0.02687999978661537
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,6144,5120,0.03359999880194664
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,6144,4096,0.023423999547958374
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,6144,4096,0.023264000192284584
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,6144,5120,0.02505600079894066
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,6144,4096,0.021247999742627144
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,6144,3584,0.026944000273942947
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,6144,3584,0.02236800082027912
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,6144,65536,0.20582400262355804
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,6144,3072,0.027264000847935677
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,6144,3072,0.019328000023961067
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,6144,3584,0.019168000668287277
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,6144,3072,0.017952000722289085
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,6144,2560,0.024000000208616257
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,6144,2560,0.024064000695943832
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,6144,1536,0.013856000266969204
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,6144,2048,0.01708799973130226
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,6144,2048,0.020608000457286835
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,6144,2560,0.016672000288963318
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,6144,1536,0.017311999574303627
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,6144,1024,0.01206399966031313
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,6144,1536,0.013887999579310417
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,6144,1024,0.012703999876976013
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,6144,768,0.010879999957978725
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,6144,1024,0.015424000099301338
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,6144,768,0.010623999871313572
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,6144,768,0.01321600005030632
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,6144,512,0.009824000298976898
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,6144,512,0.010879999957978725
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,6144,512,0.012191999703645706
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,6144,256,0.009056000038981438
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,6144,256,0.009375999681651592
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,6144,256,0.009216000325977802
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,6144,128,0.008448000065982342
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,6144,128,0.008576000109314919
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,6144,128,0.009184000082314014
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,6144,64,0.012095999903976917
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,6144,64,0.012160000391304493
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,6144,2048,0.021983999758958817
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,6144,32,0.012160000391304493
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,6144,64,0.008383999578654766
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,6144,32,0.01244799979031086
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,6144,32,0.00848000030964613
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,5120,12288,0.043168000876903534
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,5120,12288,0.044576000422239304
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,5120,16384,0.051872000098228455
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,5120,16384,0.04851200059056282
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,5120,16384,0.05696000158786774
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,5120,10240,0.0398080013692379
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,5120,12288,0.04476799815893173
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,5120,10240,0.03836800158023834
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,5120,10240,0.0387520007789135
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,5120,8192,0.049375999718904495
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,5120,7168,0.03510399907827377
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,5120,8192,0.03654399886727333
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,5120,8192,0.032575998455286026
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,5120,65536,0.13814400136470795
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,5120,7168,0.043807998299598694
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,5120,6144,0.032575998455286026
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,5120,65536,0.18384000658988953
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,5120,6144,0.03916800022125244
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,5120,7168,0.0307839997112751
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,5120,5120,0.026335999369621277
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,5120,5120,0.02755199931561947
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,5120,6144,0.027008000761270523
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,5120,4096,0.02284800074994564
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,5120,5120,0.02425600029528141
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,5120,4096,0.03292800113558769
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,5120,4096,0.020959999412298203
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,5120,3584,0.030719999223947525
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,5120,3584,0.02191999927163124
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,5120,65536,0.20496000349521637
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,5120,3584,0.01852799952030182
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,5120,3072,0.018848000094294548
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,5120,3072,0.01913600042462349
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,5120,3072,0.018271999433636665
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,5120,2560,0.017311999574303627
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,5120,2560,0.023104000836610794
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,5120,2560,0.016672000288963318
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,5120,2048,0.015072000212967396
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,5120,2048,0.015519999898970127
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,5120,2048,0.015039999969303608
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,5120,1536,0.013376000337302685
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,5120,1536,0.013376000337302685
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,5120,1024,0.018144000321626663
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,5120,1536,0.013919999822974205
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,5120,1024,0.014240000396966934
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,5120,768,0.015647999942302704
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,5120,1024,0.01539199985563755
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,5120,768,0.010495999827980995
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,5120,768,0.013279999606311321
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,5120,512,0.011935999616980553
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,5120,256,0.008704000152647495
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,5120,512,0.00940799992531538
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,5120,256,0.009600000455975533
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,5120,512,0.010944000445306301
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,5120,256,0.009503999724984169
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,5120,128,0.00848000030964613
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,5120,128,0.008576000109314919
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,5120,128,0.008767999708652496
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,5120,64,0.012319999746978283
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,5120,64,0.011776000261306763
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,5120,32,0.012992000207304955
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,5120,64,0.008128000423312187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,5120,32,0.01228800043463707
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,5120,32,0.008352000266313553
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,4096,16384,0.05363199859857559
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,4096,12288,0.03999999910593033
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,4096,16384,0.04694399982690811
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,4096,16384,0.040863998234272
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,4096,12288,0.03385600075125694
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,4096,10240,0.03728000074625015
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,4096,10240,0.04057599976658821
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,4096,10240,0.028672000393271446
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,4096,8192,0.03420799970626831
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,4096,12288,0.04303999990224838
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,4096,8192,0.034752000123262405
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,4096,8192,0.024159999564290047
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,4096,65536,0.14998400211334229
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,4096,7168,0.030079999938607216
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,4096,7168,0.03280000016093254
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,4096,65536,0.1133119985461235
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,4096,7168,0.025119999423623085
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,4096,6144,0.02969600073993206
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,4096,6144,0.02953599952161312
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,4096,6144,0.019392000511288643
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,4096,5120,0.02505600079894066
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,4096,5120,0.03903999924659729
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,4096,4096,0.021376000717282295
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,4096,4096,0.020608000457286835
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,4096,5120,0.019680000841617584
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,4096,65536,0.1353600025177002
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,4096,3584,0.0208320003002882
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,4096,4096,0.01727999933063984
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,4096,3584,0.019872000440955162
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,4096,3584,0.014431999996304512
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,4096,3072,0.026815999299287796
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,4096,2560,0.016063999384641647
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,4096,3072,0.018271999433636665
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,4096,3072,0.015744000673294067
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,4096,2560,0.015584000386297703
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,4096,2560,0.01369599997997284
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,4096,2048,0.01398400031030178
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,4096,2048,0.015168000012636185
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,4096,1536,0.012927999719977379
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,4096,1536,0.017216000705957413
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,4096,2048,0.013728000223636627
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,4096,1536,0.011711999773979187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,4096,1024,0.011359999887645245
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,4096,1024,0.011136000044643879
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,4096,768,0.010975999757647514
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,4096,1024,0.012032000347971916
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,4096,768,0.0098879998549819
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,4096,768,0.010111999697983265
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,4096,512,0.009824000298976898
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,4096,512,0.010912000201642513
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,4096,256,0.008608000352978706
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,4096,512,0.009920000098645687
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,4096,256,0.008767999708652496
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,4096,256,0.009279999881982803
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,4096,128,0.00825599953532219
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,4096,128,0.008799999952316284
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,4096,128,0.00863999966531992
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,4096,64,0.011839999817311764
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,4096,64,0.01196799986064434
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,4096,64,0.0081599997356534
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,4096,32,0.012160000391304493
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,4096,32,0.01228800043463707
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,4096,32,0.008128000423312187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,3584,12288,0.039455998688936234
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,3584,12288,0.03830400109291077
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,3584,16384,0.04601600021123886
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,3584,16384,0.04323200136423111
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,3584,16384,0.039455998688936234
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,3584,12288,0.0318400003015995
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,3584,10240,0.03731200098991394
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,3584,10240,0.036607999354600906
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,3584,10240,0.02687999978661537
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,3584,8192,0.03328000009059906
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,3584,8192,0.03311999887228012
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,3584,8192,0.022784000262618065
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,3584,7168,0.032575998455286026
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,3584,65536,0.10441599786281586
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,3584,7168,0.029983999207615852
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,3584,65536,0.13676799833774567
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,3584,7168,0.023584000766277313
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,3584,6144,0.03014400042593479
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,3584,6144,0.027744000777602196
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,3584,5120,0.028960000723600388
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,3584,5120,0.02396799996495247
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,3584,6144,0.01945599913597107
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,3584,5120,0.01817600056529045
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,3584,4096,0.02195199951529503
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,3584,4096,0.020191999152302742
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,3584,3584,0.019840000197291374
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,3584,4096,0.015904000028967857
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,3584,3584,0.02054399996995926
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,3584,65536,0.1321599930524826
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,3584,3584,0.01398400031030178
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,3584,3072,0.01817600056529045
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,3584,3072,0.018432000651955605
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,3584,3072,0.014336000196635723
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,3584,2560,0.016127999871969223
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,3584,2560,0.015456000342965126
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,3584,2048,0.015039999969303608
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,3584,2048,0.01408000010997057
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,3584,2560,0.014592000283300877
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,3584,1536,0.012959999963641167
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,3584,2048,0.012799999676644802
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,3584,1536,0.012319999746978283
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,3584,1024,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,3584,1536,0.011935999616980553
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,3584,1024,0.010975999757647514
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,3584,1024,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,3584,768,0.01056000031530857
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,3584,768,0.009952000342309475
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,3584,512,0.009375999681651592
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,3584,512,0.00902399979531765
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,3584,768,0.010015999898314476
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,3584,512,0.01027199998497963
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,3584,256,0.00848000030964613
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,3584,256,0.009440000168979168
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,3584,128,0.008063999935984612
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,3584,128,0.007935999892652035
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,3584,128,0.00886400043964386
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,3584,64,0.011872000060975552
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,3584,64,0.011744000017642975
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,3584,32,0.012032000347971916
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,3584,32,0.012160000391304493
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,3584,64,0.008063999935984612
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,3584,32,0.008320000022649765
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,3584,256,0.008704000152647495
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,3072,12288,0.038784001022577286
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,3072,16384,0.044895999133586884
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,3072,16384,0.04451199993491173
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,3072,16384,0.039264000952243805
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,3072,12288,0.03846399858593941
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,3072,12288,0.030079999938607216
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,3072,10240,0.034912001341581345
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,3072,10240,0.03638400137424469
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,3072,8192,0.03452799841761589
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,3072,7168,0.03372799977660179
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,3072,8192,0.031488001346588135
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,3072,10240,0.026176000013947487
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,3072,8192,0.022431999444961548
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,3072,65536,0.09353599697351456
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,3072,65536,0.10940799862146378
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,3072,7168,0.03683200106024742
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,3072,6144,0.02800000086426735
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,3072,6144,0.026367999613285065
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,3072,7168,0.025472000241279602
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,3072,5120,0.027807999402284622
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,3072,5120,0.025536000728607178
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,3072,6144,0.018079999834299088
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,3072,4096,0.021503999829292297
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,3072,4096,0.020320000126957893
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,3072,5120,0.01852799952030182
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,3072,3584,0.019360000267624855
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,3072,3584,0.020479999482631683
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,3072,4096,0.017952000722289085
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,3072,65536,0.12639999389648438
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,3072,3584,0.014592000283300877
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,3072,3072,0.018848000094294548
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,3072,3072,0.026496000587940216
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,3072,2560,0.015904000028967857
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,3072,2560,0.015456000342965126
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,3072,3072,0.01548799965530634
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,3072,2048,0.014271999709308147
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,3072,2560,0.013919999822974205
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,3072,2048,0.0208320003002882
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,3072,1536,0.012768000364303589
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,3072,2048,0.013088000006973743
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,3072,1536,0.017472000792622566
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,3072,1024,0.011264000087976456
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,3072,1024,0.014336000196635723
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,3072,768,0.010111999697983265
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,3072,1536,0.011392000131309032
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,3072,1024,0.011168000288307667
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,3072,768,0.00979200005531311
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,3072,512,0.009503999724984169
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,3072,768,0.01056000031530857
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,3072,512,0.009664000011980534
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,3072,256,0.00848000030964613
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,3072,256,0.00854399986565113
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,3072,512,0.009983999654650688
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,3072,256,0.008960000239312649
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,3072,128,0.007872000336647034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,3072,128,0.008031999692320824
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,3072,128,0.008799999952316284
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,3072,64,0.011744000017642975
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,3072,64,0.01190400030463934
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,3072,32,0.011744000017642975
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,3072,64,0.008383999578654766
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,3072,32,0.011552000418305397
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,3072,32,0.007903999648988247
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,2560,12288,0.03455999866127968
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,2560,12288,0.03500799834728241
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,2560,16384,0.04070400074124336
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,2560,16384,0.04150399938225746
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,2560,16384,0.03606399893760681
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,2560,12288,0.03139200061559677
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,2560,10240,0.03711999952793121
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,2560,10240,0.032416000962257385
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,2560,8192,0.03532800078392029
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,2560,10240,0.026048000901937485
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,2560,8192,0.029311999678611755
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,2560,8192,0.025567999109625816
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,2560,7168,0.03248000144958496
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,2560,7168,0.026335999369621277
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,2560,65536,0.08585599809885025
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,2560,65536,0.10812799632549286
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,2560,6144,0.031647998839616776
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,2560,6144,0.026528000831604004
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,2560,7168,0.022431999444961548
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,2560,5120,0.024992000311613083
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,2560,5120,0.023391999304294586
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,2560,5120,0.017343999817967415
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,2560,4096,0.021568000316619873
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,2560,4096,0.02067199908196926
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,2560,4096,0.017055999487638474
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,2560,3584,0.02163200080394745
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,2560,65536,0.1225920021533966
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,2560,3584,0.019487999379634857
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,2560,3584,0.01539199985563755
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,2560,3072,0.017696000635623932
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,2560,3072,0.01708799973130226
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,2560,2560,0.01539199985563755
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,2560,2560,0.016448000445961952
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,2560,3072,0.014911999925971031
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,2560,2560,0.013952000066637993
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,2560,6144,0.017791999503970146
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,2560,2048,0.014879999682307243
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,2560,1536,0.012959999963641167
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,2560,2048,0.013824000023305416
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,2560,1536,0.01235199999064207
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,2560,1536,0.011680000461637974
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,2560,2048,0.01235199999064207
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,2560,1024,0.011168000288307667
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,2560,1024,0.010528000071644783
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,2560,768,0.010080000385642052
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,2560,1024,0.010432000271975994
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,2560,768,0.009631999768316746
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,2560,512,0.008927999995648861
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,2560,512,0.009855999611318111
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,2560,768,0.011680000461637974
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,2560,256,0.008704000152647495
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,2560,512,0.010847999714314938
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,2560,256,0.00886400043964386
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,2560,128,0.007968000136315823
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,2560,64,0.011711999773979187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,2560,256,0.009279999881982803
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,2560,128,0.00863999966531992
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,2560,128,0.0077760000713169575
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,2560,64,0.01152000017464161
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,2560,64,0.008063999935984612
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,2560,32,0.012160000391304493
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,2560,32,0.011648000217974186
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,2560,32,0.008287999778985977
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,2048,12288,0.03673600032925606
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,2048,12288,0.039135999977588654
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,2048,16384,0.0424639992415905
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,2048,16384,0.03670400008559227
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,2048,16384,0.03551999852061272
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,2048,12288,0.028736000880599022
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,2048,10240,0.037151999771595
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,2048,10240,0.034752000123262405
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,2048,10240,0.03903999924659729
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,2048,8192,0.027615999802947044
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,2048,8192,0.027168000116944313
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,2048,7168,0.03167999908328056
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,2048,65536,0.09372799843549728
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,2048,8192,0.032287999987602234
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,2048,65536,0.09647999703884125
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,2048,7168,0.02675200067460537
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,2048,6144,0.028672000393271446
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,2048,7168,0.0306560005992651
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,2048,5120,0.02236800082027912
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,2048,6144,0.02505600079894066
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,2048,6144,0.01817600056529045
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,2048,5120,0.023264000192284584
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,2048,4096,0.02006400004029274
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,2048,5120,0.01817600056529045
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,2048,4096,0.01945599913597107
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,2048,3584,0.019519999623298645
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,2048,4096,0.015104000456631184
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,2048,3584,0.01990400068461895
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,2048,65536,0.12003199756145477
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,2048,3584,0.01375999953597784
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,2048,3072,0.01696000061929226
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,2048,3072,0.017055999487638474
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,2048,3072,0.016448000445961952
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,2048,2560,0.014911999925971031
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,2048,2560,0.015615999698638916
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,2048,2560,0.012992000207304955
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,2048,2048,0.013504000380635262
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,2048,2048,0.01398400031030178
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,2048,1536,0.012703999876976013
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,2048,1536,0.012480000033974648
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,2048,2048,0.012191999703645706
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,2048,1024,0.010304000228643417
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,2048,1024,0.011008000001311302
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,2048,1536,0.011648000217974186
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,2048,1024,0.009855999611318111
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,2048,768,0.010015999898314476
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,2048,768,0.010048000141978264
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,2048,512,0.009216000325977802
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,2048,768,0.011776000261306763
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,2048,512,0.009312000125646591
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,2048,256,0.008224000222980976
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,2048,512,0.010015999898314476
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,2048,256,0.008415999822318554
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,2048,128,0.008511999621987343
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,2048,256,0.009503999724984169
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,2048,128,0.008224000222980976
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,2048,64,0.011744000017642975
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,2048,128,0.007935999892652035
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,2048,64,0.011872000060975552
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,2048,64,0.008063999935984612
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,2048,32,0.011935999616980553
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,2048,32,0.008415999822318554
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,1536,12288,0.03049599938094616
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,1536,16384,0.044064000248909
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,1536,16384,0.03452799841761589
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,2048,32,0.012000000104308128
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,1536,16384,0.03510399907827377
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,1536,12288,0.03200000151991844
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,1536,12288,0.027904000133275986
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,1536,10240,0.02879999950528145
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,1536,10240,0.029311999678611755
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,1536,8192,0.02687999978661537
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,1536,8192,0.02768000029027462
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,1536,65536,0.07804799824953079
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,1536,10240,0.03827200084924698
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,1536,65536,0.07667200267314911
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,1536,8192,0.031488001346588135
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,1536,7168,0.024960000067949295
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,1536,7168,0.0244159996509552
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,1536,6144,0.0244159996509552
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,1536,7168,0.02147199958562851
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,1536,6144,0.023584000766277313
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,1536,6144,0.01788800023496151
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,1536,5120,0.021183999255299568
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,1536,5120,0.020864000543951988
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,1536,4096,0.021376000717282295
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,1536,4096,0.020416000857949257
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,1536,65536,0.11814399808645248
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,1536,5120,0.01724799908697605
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,1536,3584,0.019487999379634857
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,1536,4096,0.020896000787615776
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,1536,3584,0.0191040001809597
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,1536,3584,0.017983999103307724
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,1536,3072,0.016256000846624374
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,1536,2560,0.015039999969303608
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,1536,3072,0.017216000705957413
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,1536,2560,0.015231999568641186
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,1536,3072,0.016575999557971954
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,1536,2560,0.013024000450968742
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,1536,2048,0.014015999622642994
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,1536,2048,0.01414399966597557
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,1536,2048,0.012032000347971916
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,1536,1536,0.012384000234305859
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,1536,1536,0.012223999947309494
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,1536,1024,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,1536,1536,0.011296000331640244
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,1536,1024,0.010623999871313572
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,1536,1024,0.01033599954098463
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,1536,768,0.009824000298976898
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,1536,768,0.009855999611318111
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,1536,768,0.012480000033974648
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,1536,512,0.008927999995648861
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,1536,512,0.008991999551653862
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,1536,256,0.008511999621987343
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,1536,512,0.010080000385642052
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,1536,256,0.008352000266313553
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,1536,256,0.00854399986565113
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,1536,128,0.007903999648988247
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,1536,128,0.0081599997356534
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,1536,128,0.008224000222980976
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,1536,64,0.011807999573647976
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,1536,64,0.011487999930977821
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,1536,64,0.008287999778985977
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,1536,32,0.011648000217974186
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,1536,32,0.011680000461637974
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,1536,32,0.008224000222980976
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,1024,12288,0.02969600073993206
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,1024,12288,0.02828799933195114
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,1024,16384,0.032287999987602234
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,1024,16384,0.03139200061559677
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,1024,16384,0.03488000109791756
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,1024,12288,0.04416000097990036
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,1024,10240,0.026016000658273697
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,1024,10240,0.03436800092458725
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,1024,10240,0.03824000060558319
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,1024,8192,0.02831999957561493
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,1024,8192,0.02300800010561943
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,1024,7168,0.022175999358296394
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,1024,8192,0.021344000473618507
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,1024,7168,0.0261439997702837
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,1024,65536,0.0629120022058487
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,1024,65536,0.07843200117349625
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,1024,6144,0.02175999991595745
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,1024,7168,0.029472000896930695
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,1024,6144,0.02131200022995472
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,1024,5120,0.024351999163627625
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,1024,5120,0.021824000403285027
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,1024,6144,0.025280000641942024
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,1024,4096,0.018783999606966972
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,1024,5120,0.016992000862956047
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,1024,4096,0.02070399932563305
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,1024,3584,0.018848000094294548
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,1024,4096,0.019231999292969704
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,1024,3584,0.019487999379634857
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,1024,65536,0.11779200285673141
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,1024,3072,0.016383999958634377
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,1024,3584,0.013183999806642532
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,1024,2560,0.014592000283300877
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,1024,3072,0.01679999940097332
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,1024,3072,0.013535999692976475
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,1024,2048,0.013439999893307686
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,1024,2560,0.015615999698638916
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,1024,2048,0.013856000266969204
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,1024,2048,0.011711999773979187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,1024,1536,0.01190400030463934
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,1024,1536,0.012095999903976917
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,1024,1024,0.010495999827980995
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,1024,1536,0.010944000445306301
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,1024,1024,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,1024,1024,0.010080000385642052
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,1024,768,0.00940799992531538
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,1024,768,0.009664000011980534
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,1024,512,0.009184000082314014
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,1024,768,0.011711999773979187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,1024,512,0.008895999751985073
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,1024,512,0.01017600018531084
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,1024,256,0.008415999822318554
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,1024,256,0.00825599953532219
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,1024,2560,0.015456000342965126
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,1024,256,0.008448000065982342
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,1024,128,0.008063999935984612
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,1024,128,0.008224000222980976
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,1024,128,0.00848000030964613
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,1024,64,0.011455999687314034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,1024,64,0.011839999817311764
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,1024,32,0.011615999974310398
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,1024,32,0.011648000217974186
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,1024,64,0.00774399982765317
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,1024,32,0.00800000037997961
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,768,12288,0.026464000344276428
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,768,12288,0.026688000187277794
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,768,16384,0.033952001482248306
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,768,16384,0.030079999938607216
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,768,16384,0.04118400067090988
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,768,10240,0.025280000641942024
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,768,12288,0.043935999274253845
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,768,10240,0.024607999250292778
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,768,10240,0.03750399872660637
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,768,8192,0.030527999624609947
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,768,8192,0.02332800067961216
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,768,65536,0.05552000179886818
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,768,65536,0.05375999957323074
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,768,8192,0.031007999554276466
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,768,7168,0.023231999948620796
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,768,7168,0.02179200015962124
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,768,6144,0.022304000332951546
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,768,7168,0.02985600009560585
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,768,5120,0.02099199965596199
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,768,6144,0.022592000663280487
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,768,5120,0.019999999552965164
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,768,6144,0.02582399919629097
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,768,5120,0.01664000004529953
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,768,4096,0.021088000386953354
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,768,3584,0.01881599985063076
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,768,65536,0.11868800222873688
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,768,4096,0.014879999682307243
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,768,4096,0.01894400082528591
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,768,3584,0.01913600042462349
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,768,3584,0.013632000423967838
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,768,3072,0.01759999990463257
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,768,3072,0.016767999157309532
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,768,2560,0.014592000283300877
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,768,3072,0.013887999579310417
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,768,2560,0.015200000256299973
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,768,2048,0.013279999606311321
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,768,2048,0.01408000010997057
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,768,2560,0.01283199992030859
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,768,1536,0.011711999773979187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,768,2048,0.01152000017464161
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,768,1536,0.012095999903976917
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,768,1024,0.010239999741315842
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,768,1024,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,768,1536,0.010847999714314938
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,768,1024,0.011392000131309032
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,768,768,0.00979200005531311
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,768,768,0.009920000098645687
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,768,768,0.011392000131309032
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,768,512,0.009344000369310379
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,768,512,0.008767999708652496
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,768,256,0.007903999648988247
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,768,512,0.010111999697983265
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,768,256,0.008511999621987343
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,768,256,0.009440000168979168
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,768,128,0.00774399982765317
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,768,128,0.008320000022649765
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,768,128,0.008191999979317188
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,768,64,0.011103999800980091
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,768,64,0.011711999773979187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,768,64,0.007968000136315823
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,768,32,0.011776000261306763
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,768,32,0.011776000261306763
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,768,32,0.008320000022649765
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,512,12288,0.02579200081527233
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,512,16384,0.02844800055027008
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,512,16384,0.027744000777602196
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,512,16384,0.04016000032424927
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,512,12288,0.043935999274253845
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,512,10240,0.025248000398278236
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,512,10240,0.024288000538945198
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,512,12288,0.026335999369621277
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,512,8192,0.02304000034928322
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,512,10240,0.03766399994492531
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,512,65536,0.05097600072622299
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,512,65536,0.0517439991235733
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,512,8192,0.022175999358296394
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,512,7168,0.022592000663280487
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,512,8192,0.02112000063061714
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,512,7168,0.021088000386953354
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,512,7168,0.020416000857949257
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,512,6144,0.020576000213623047
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,512,6144,0.021536000072956085
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,512,5120,0.01913600042462349
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,512,6144,0.017696000635623932
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,512,5120,0.018912000581622124
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,512,65536,0.11747200042009354
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,512,4096,0.018624000251293182
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,512,5120,0.016287999227643013
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,512,4096,0.018079999834299088
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,512,4096,0.014976000413298607
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,512,3584,0.019071999937295914
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,512,3584,0.018303999677300453
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,512,3584,0.013407999649643898
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,512,3072,0.017311999574303627
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,512,3072,0.01664000004529953
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,512,2560,0.014976000413298607
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,512,3072,0.013120000250637531
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,512,2560,0.015296000055968761
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,512,2048,0.014112000353634357
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,512,2560,0.013024000450968742
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,512,2048,0.013856000266969204
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,512,1536,0.012480000033974648
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,512,2048,0.011807999573647976
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,512,1024,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,512,1536,0.012384000234305859
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,512,1536,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,512,1024,0.010143999941647053
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,512,1024,0.009952000342309475
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,512,768,0.009696000255644321
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,512,768,0.009824000298976898
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,512,512,0.008704000152647495
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,512,512,0.008832000195980072
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,512,768,0.011648000217974186
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,512,512,0.009759999811649323
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,512,256,0.007903999648988247
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,512,256,0.008415999822318554
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,512,256,0.008895999751985073
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,512,128,0.007615999784320593
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,512,128,0.008191999979317188
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,512,64,0.011455999687314034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,512,128,0.00848000030964613
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,512,64,0.01142400037497282
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,512,32,0.011680000461637974
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,512,64,0.007968000136315823
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,512,32,0.01158399973064661
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,512,32,0.007968000136315823
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,256,12288,0.02409599907696247
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,256,12288,0.03222399950027466
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,256,16384,0.02672000043094158
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,256,16384,0.025855999439954758
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,256,16384,0.03411199897527695
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,256,12288,0.04399999976158142
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,256,10240,0.024383999407291412
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,256,10240,0.02380800060927868
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,256,8192,0.022304000332951546
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,256,10240,0.024383999407291412
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,256,8192,0.02223999984562397
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,256,7168,0.020800000056624413
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,256,65536,0.045504000037908554
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,256,8192,0.03139200061559677
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,256,65536,0.045024000108242035
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,256,7168,0.020767999812960625
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,256,6144,0.021376000717282295
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,256,7168,0.029440000653266907
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,256,6144,0.021215999498963356
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,256,5120,0.02067199908196926
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,256,6144,0.017055999487638474
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,256,4096,0.01942400075495243
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,256,5120,0.021088000386953354
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,256,5120,0.023296000435948372
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,256,65536,0.11903999745845795
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,256,4096,0.019872000440955162
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,256,4096,0.01484800036996603
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,256,3584,0.01929599978029728
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,256,3584,0.019551999866962433
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,256,3072,0.01740800030529499
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,256,3584,0.013439999893307686
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,256,2560,0.014944000169634819
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,256,3072,0.016607999801635742
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,256,3072,0.017023999243974686
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,256,2560,0.01500799972563982
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,256,2048,0.013376000337302685
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,256,2560,0.012415999546647072
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,256,2048,0.013632000423967838
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,256,2048,0.011552000418305397
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,256,1536,0.012191999703645706
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,256,1536,0.01235199999064207
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,256,1536,0.010912000201642513
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,256,1024,0.010463999584317207
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,256,1024,0.01033599954098463
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,256,768,0.009600000455975533
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,256,1024,0.010847999714314938
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,256,768,0.011168000288307667
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,256,512,0.009375999681651592
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,256,512,0.008927999995648861
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,256,512,0.01027199998497963
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,256,256,0.008063999935984612
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,256,256,0.0081599997356534
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,256,128,0.007935999892652035
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,256,256,0.008352000266313553
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,256,128,0.007840000092983246
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,256,128,0.008063999935984612
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,256,64,0.01158399973064661
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,256,64,0.01142400037497282
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,256,64,0.008287999778985977
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,256,32,0.01142400037497282
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,256,32,0.011615999974310398
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,256,32,0.008287999778985977
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,256,768,0.010304000228643417
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,128,12288,0.02924799919128418
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,128,16384,0.029983999207615852
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,128,16384,0.026048000901937485
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,128,12288,0.02457600086927414
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,128,16384,0.034432001411914825
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,128,12288,0.043007999658584595
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,128,10240,0.024831999093294144
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,128,10240,0.023615999147295952
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,128,8192,0.027295999228954315
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,128,10240,0.024032000452280045
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,128,8192,0.022655999287962914
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,128,65536,0.04156799986958504
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,128,8192,0.03215999901294708
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,128,7168,0.02175999991595745
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,128,65536,0.040383998304605484
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,128,6144,0.023455999791622162
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,128,7168,0.029600000008940697
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,128,6144,0.021503999829292297
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,128,5120,0.020416000857949257
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,128,7168,0.02179200015962124
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,128,5120,0.021023999899625778
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,128,6144,0.025151999667286873
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,128,4096,0.018912000581622124
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,128,65536,0.11817599833011627
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,128,4096,0.02112000063061714
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,128,5120,0.016736000776290894
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,128,3584,0.018688000738620758
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,128,4096,0.014783999882638454
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,128,3584,0.019039999693632126
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,128,3584,0.013439999893307686
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,128,3072,0.016543999314308167
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,128,3072,0.017216000705957413
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,128,3072,0.016896000131964684
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,128,2560,0.014944000169634819
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,128,2560,0.01484800036996603
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,128,2560,0.012032000347971916
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,128,2048,0.01321600005030632
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,128,2048,0.01360000018030405
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,128,1536,0.011455999687314034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,128,1536,0.01206399966031313
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,128,1024,0.010495999827980995
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,128,1536,0.01104000024497509
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,128,2048,0.013567999936640263
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,128,1024,0.010143999941647053
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,128,768,0.009216000325977802
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,128,1024,0.009920000098645687
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,128,768,0.009472000412642956
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,128,512,0.008704000152647495
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,128,512,0.009568000212311745
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,128,768,0.011359999887645245
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,128,512,0.01017600018531084
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,128,256,0.008383999578654766
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,128,256,0.0081599997356534
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,128,256,0.008991999551653862
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,128,128,0.007615999784320593
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,128,128,0.00854399986565113
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,128,128,0.007648000027984381
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,128,64,0.011231999844312668
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,128,64,0.011680000461637974
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,128,32,0.011168000288307667
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,128,64,0.0077760000713169575
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,128,32,0.011487999930977821
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,128,32,0.00800000037997961
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,64,12288,0.02502400055527687
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,64,12288,0.025407999753952026
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,64,16384,0.026335999369621277
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,64,16384,0.027871999889612198
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,64,16384,0.03436800092458725
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,64,12288,0.04371200129389763
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,64,10240,0.024960000067949295
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,64,10240,0.02470399998128414
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,64,10240,0.02454400062561035
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,64,8192,0.02364799939095974
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,64,8192,0.022431999444961548
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,64,65536,0.04064000025391579
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,64,65536,0.04224000126123428
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,64,7168,0.021183999255299568
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,64,8192,0.031808000057935715
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,64,7168,0.029152000322937965
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,64,6144,0.02147199958562851
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,64,5120,0.020735999569296837
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,64,6144,0.022336000576615334
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,64,6144,0.017664000391960144
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,64,65536,0.11695999652147293
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,64,5120,0.020800000056624413
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,64,5120,0.0163199994713068
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,64,4096,0.018624000251293182
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,64,4096,0.01865600049495697
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,64,4096,0.014879999682307243
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,64,3584,0.020128000527620316
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,64,3584,0.019071999937295914
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,64,7168,0.022624000906944275
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,64,3584,0.017055999487638474
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,64,3072,0.01600000075995922
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,64,3072,0.016736000776290894
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,64,3072,0.013376000337302685
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,64,2560,0.015200000256299973
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,64,2048,0.01398400031030178
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,64,2560,0.015776000916957855
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,64,2560,0.01196799986064434
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,64,2048,0.01369599997997284
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,64,2048,0.011327999643981457
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,64,1536,0.011935999616980553
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,64,1536,0.011807999573647976
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,64,1536,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,64,1024,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,64,1024,0.010304000228643417
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,64,1024,0.009855999611318111
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,64,768,0.009472000412642956
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,64,768,0.009535999968647957
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,64,768,0.009631999768316746
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,64,512,0.009151999838650227
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,64,512,0.00902399979531765
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,64,512,0.00979200005531311
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,64,256,0.007552000228315592
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,64,256,0.00848000030964613
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,64,256,0.008320000022649765
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,64,128,0.007648000027984381
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,64,128,0.007584000006318092
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,64,128,0.008031999692320824
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,64,64,0.011744000017642975
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,64,64,0.011359999887645245
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,64,32,0.011264000087976456
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,64,64,0.00800000037997961
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,64,32,0.011455999687314034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,64,32,0.0077760000713169575
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,32,12288,0.025312000885605812
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,32,12288,0.024639999493956566
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,32,16384,0.026496000587940216
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,32,16384,0.027615999802947044
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,32,16384,0.05260799825191498
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,32,12288,0.04214400053024292
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,32,10240,0.02425600029528141
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,32,10240,0.0244159996509552
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,32,8192,0.02396799996495247
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,32,10240,0.03699199855327606
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,32,8192,0.02223999984562397
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,32,65536,0.0427200011909008
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,32,65536,0.040991999208927155
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,32,8192,0.028960000723600388
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,32,7168,0.022304000332951546
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,32,7168,0.020896000787615776
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,32,6144,0.022784000262618065
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,32,7168,0.02848000079393387
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,32,6144,0.021376000717282295
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,32,5120,0.02022399939596653
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,32,5120,0.020927999168634415
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,32,6144,0.024224000051617622
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,32,5120,0.021663999184966087
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,32,65536,0.18559999763965607
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,32,4096,0.018432000651955605
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,32,4096,0.019360000267624855
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,32,4096,0.018400000408291817
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,32,3584,0.018688000738620758
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,32,3584,0.019200000911951065
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,32,3584,0.016704000532627106
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,32,3072,0.015968000516295433
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,32,3072,0.01696000061929226
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,32,3072,0.016543999314308167
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,32,2560,0.015904000028967857
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,32,2560,0.014688000082969666
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,32,2048,0.01408000010997057
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,32,2560,0.014399999752640724
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,32,2048,0.013472000136971474
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,32,1536,0.012191999703645706
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,32,1536,0.01196799986064434
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,32,2048,0.013376000337302685
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,32,1536,0.01206399966031313
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,32,1024,0.010080000385642052
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,32,1024,0.01027199998497963
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,32,768,0.0098879998549819
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,32,1024,0.010208000428974628
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,32,768,0.009503999724984169
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,32,512,0.008511999621987343
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,32,768,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,32,512,0.008960000239312649
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,32,256,0.0080960001796484
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,32,512,0.009824000298976898
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,32,256,0.0080960001796484
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,32,256,0.008895999751985073
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,32,128,0.0077760000713169575
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,32,128,0.007968000136315823
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,32,128,0.007584000006318092
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,32,64,0.011327999643981457
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,32,64,0.0077760000713169575
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,128,32,32,0.011231999844312668
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,32,32,0.01152000017464161
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,128,32,32,0.007807999849319458
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,65536,12288,0.3049600124359131
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,128,32,64,0.011136000044643879
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,65536,16384,0.39612799882888794
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,65536,12288,0.3627200126647949
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,65536,12288,0.3115200102329254
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,65536,10240,0.2759360074996948
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,65536,8192,0.20694400370121002
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,65536,10240,0.26080000400543213
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,65536,7168,0.21379199624061584
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,65536,10240,0.2622720003128052
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,65536,7168,0.1789119988679886
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,65536,16384,0.4085119962692261
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,65536,8192,0.21964800357818604
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,65536,16384,0.4566720128059387
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,65536,8192,0.2141759991645813
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,65536,7168,0.18831999599933624
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,65536,6144,0.15513600409030914
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,65536,6144,0.19164800643920898
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,65536,5120,0.13286399841308594
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,65536,4096,0.10771200060844421
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,65536,5120,0.1536639928817749
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,65536,4096,0.12649600207805634
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,65536,6144,0.1656319946050644
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,65536,3584,0.09788800030946732
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,65536,5120,0.13939200341701508
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,65536,4096,0.1125440001487732
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,65536,3584,0.1162559986114502
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,65536,3072,0.08604799956083298
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,65536,2560,0.07452800124883652
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,65536,3072,0.09955199807882309
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,65536,2560,0.08383999764919281
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,65536,3584,0.10345599800348282
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,65536,2048,0.07171200215816498
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,65536,3072,0.09193599969148636
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,65536,2048,0.06272000074386597
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,65536,2560,0.07929600030183792
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,65536,1536,0.05177599936723709
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,65536,1536,0.05603199824690819
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,65536,1024,0.04032000154256821
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,65536,2048,0.06649599969387054
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,65536,1536,0.05395200103521347
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,65536,1024,0.04374400153756142
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,65536,768,0.03766399994492531
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,65536,1024,0.04150399938225746
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,65536,768,0.03510399907827377
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,65536,512,0.03142400085926056
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,65536,512,0.02860799990594387
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,65536,768,0.034432001411914825
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,65536,256,0.02038400061428547
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,65536,512,0.028224000707268715
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,65536,256,0.018880000337958336
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,65536,128,0.01500799972563982
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,65536,256,0.021983999758958817
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,65536,128,0.01548799965530634
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,65536,64,0.01788800023496151
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,65536,128,0.018079999834299088
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,65536,64,0.017855999991297722
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,65536,64,0.01539199985563755
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,65536,32,0.02332800067961216
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,65536,32,0.02393599972128868
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,65536,32,0.015456000342965126
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,16384,12288,0.12044800072908401
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,16384,16384,0.10441599786281586
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,16384,12288,0.09763199836015701
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,16384,16384,0.1234240010380745
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,16384,16384,0.11535999923944473
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,16384,12288,0.09583999961614609
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,16384,10240,0.07779199630022049
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,16384,10240,0.10540799796581268
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,16384,10240,0.08371199667453766
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,16384,8192,0.06230400130152702
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,16384,8192,0.08025600016117096
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,16384,65536,0.405023992061615
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,16384,7168,0.059808000922203064
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,16384,8192,0.06185600161552429
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,16384,7168,0.050783999264240265
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,16384,6144,0.05366399884223938
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,16384,6144,0.0655359998345375
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,16384,7168,0.05769599974155426
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,16384,5120,0.04047999903559685
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,16384,5120,0.046431999653577805
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,16384,6144,0.05215999856591225
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,16384,4096,0.04047999903559685
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,16384,4096,0.04646399989724159
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,16384,65536,0.47097599506378174
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,16384,5120,0.046112000942230225
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,16384,4096,0.03750399872660637
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,16384,3584,0.03500799834728241
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,16384,3584,0.04339199885725975
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,16384,3072,0.032127998769283295
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,16384,3584,0.03471999987959862
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,16384,3072,0.030688000842928886
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,16384,2560,0.02940800040960312
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,16384,2560,0.03206399828195572
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,16384,2560,0.028031999245285988
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,16384,2048,0.02486399933695793
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,16384,2048,0.026655999943614006
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,16384,2048,0.02300800010561943
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,16384,1536,0.021088000386953354
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,16384,1536,0.02304000034928322
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,16384,65536,0.5142080187797546
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,16384,1024,0.018144000321626663
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,16384,1024,0.016607999801635742
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,16384,1536,0.01942400075495243
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,16384,1024,0.016448000445961952
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,16384,768,0.01711999997496605
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,16384,768,0.015968000516295433
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,16384,3072,0.02800000086426735
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,16384,512,0.013024000450968742
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,16384,768,0.016704000532627106
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,16384,512,0.01375999953597784
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,16384,512,0.012927999719977379
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,16384,256,0.011872000060975552
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,16384,256,0.011168000288307667
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,16384,256,0.009855999611318111
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,16384,128,0.009920000098645687
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,16384,128,0.00979200005531311
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,16384,64,0.01414399966597557
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,16384,64,0.013728000223636627
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,16384,128,0.00902399979531765
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,16384,64,0.008415999822318554
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,16384,32,0.014976000413298607
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,16384,32,0.015744000673294067
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,16384,32,0.00848000030964613
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,12288,12288,0.09411200135946274
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,12288,12288,0.08003199845552444
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,12288,16384,0.10364799946546555
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,12288,16384,0.11420799791812897
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,12288,16384,0.09279999881982803
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,12288,10240,0.07347200065851212
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,12288,10240,0.0674239993095398
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,12288,12288,0.07583999633789062
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,12288,10240,0.06601600348949432
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,12288,8192,0.060447998344898224
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,12288,8192,0.05596800148487091
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,12288,8192,0.049984000623226166
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,12288,65536,0.38675200939178467
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,12288,7168,0.05209600180387497
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,12288,6144,0.0488319993019104
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,12288,7168,0.050783999264240265
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,12288,6144,0.05923200026154518
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,12288,65536,0.4604479968547821
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,12288,7168,0.04585599899291992
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,12288,5120,0.04102399945259094
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,12288,6144,0.04089599847793579
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,12288,65536,0.3399359881877899
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,12288,5120,0.039264000952243805
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,12288,4096,0.033535998314619064
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,12288,4096,0.036928001791238785
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,12288,5120,0.03532800078392029
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,12288,3584,0.03139200061559677
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,12288,3584,0.03267199918627739
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,12288,4096,0.028863999992609024
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,12288,3072,0.0297279991209507
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,12288,3584,0.02816000021994114
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,12288,3072,0.027807999402284622
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,12288,2560,0.02595200017094612
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,12288,2560,0.025248000398278236
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,12288,3072,0.024191999807953835
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,12288,2048,0.02316799946129322
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,12288,2560,0.022272000089287758
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,12288,2048,0.021824000403285027
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,12288,2048,0.01942400075495243
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,12288,1536,0.019519999623298645
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,12288,1536,0.019360000267624855
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,12288,1024,0.01817600056529045
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,12288,1024,0.017055999487638474
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,12288,1536,0.016543999314308167
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,12288,1024,0.014336000196635723
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,12288,768,0.016575999557971954
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,12288,768,0.01568000018596649
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,12288,512,0.013407999649643898
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,12288,768,0.012927999719977379
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,12288,512,0.01408000010997057
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,12288,256,0.011455999687314034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,12288,512,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,12288,256,0.011680000461637974
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,12288,256,0.009503999724984169
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,12288,128,0.010400000028312206
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,12288,128,0.010111999697983265
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,12288,64,0.013952000066637993
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,12288,128,0.008224000222980976
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,12288,64,0.013632000423967838
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,12288,64,0.007552000228315592
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,12288,32,0.014751999638974667
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,12288,32,0.015200000256299973
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,12288,32,0.008191999979317188
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,10240,16384,0.1043199971318245
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,10240,16384,0.10294400155544281
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,10240,12288,0.07843200117349625
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,10240,16384,0.08508799970149994
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,10240,10240,0.06851200014352798
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,10240,12288,0.07094399631023407
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,10240,10240,0.06758400052785873
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,10240,12288,0.07926400005817413
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,10240,65536,0.38304001092910767
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,10240,10240,0.062463998794555664
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,10240,8192,0.05648000165820122
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,10240,8192,0.05580800026655197
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,10240,8192,0.04729599878191948
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,10240,7168,0.050464000552892685
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,10240,7168,0.0504320003092289
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,10240,65536,0.3822399973869324
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,10240,6144,0.0453759990632534
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,10240,6144,0.045343998819589615
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,10240,5120,0.03993599861860275
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,10240,7168,0.044415999203920364
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,10240,6144,0.03888000175356865
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,10240,5120,0.03916800022125244
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,10240,4096,0.04278400167822838
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,10240,3584,0.03152000159025192
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,10240,4096,0.03407999873161316
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,10240,5120,0.03510399907827377
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,10240,3584,0.038336001336574554
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,10240,4096,0.02921600081026554
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,10240,3584,0.02566399984061718
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,10240,65536,0.3182399868965149
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,10240,3072,0.028960000723600388
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,10240,3072,0.03436800092458725
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,10240,2560,0.025312000885605812
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,10240,3072,0.022784000262618065
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,10240,2560,0.03046399913728237
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,10240,2560,0.020927999168634415
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,10240,2048,0.022431999444961548
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,10240,2048,0.02486399933695793
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,10240,1536,0.021376000717282295
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,10240,2048,0.01836800016462803
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,10240,1536,0.018464000895619392
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,10240,1024,0.017535999417304993
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,10240,1024,0.016863999888300896
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,10240,1536,0.016416000202298164
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,10240,768,0.015584000386297703
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,10240,1024,0.01398400031030178
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,10240,768,0.014879999682307243
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,10240,512,0.01321600005030632
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,10240,768,0.012640000320971012
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,10240,512,0.013248000293970108
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,10240,512,0.011168000288307667
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,10240,256,0.011327999643981457
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,10240,256,0.011103999800980091
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,10240,256,0.008895999751985073
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,10240,128,0.009855999611318111
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,10240,128,0.010143999941647053
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,10240,64,0.013632000423967838
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,10240,64,0.014720000326633453
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,10240,128,0.008031999692320824
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,10240,64,0.007712000049650669
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,10240,32,0.014592000283300877
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,10240,32,0.015072000212967396
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,10240,32,0.007552000228315592
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,8192,12288,0.06006399914622307
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,8192,12288,0.05366399884223938
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,8192,16384,0.09059199690818787
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,8192,16384,0.06470400094985962
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,8192,16384,0.0597120001912117
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,8192,10240,0.04527999833226204
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,8192,10240,0.06163199990987778
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,8192,12288,0.05129599943757057
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,8192,10240,0.045504000037908554
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,8192,8192,0.04620800167322159
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,8192,8192,0.03951999917626381
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,8192,7168,0.04438399896025658
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,8192,8192,0.03551999852061272
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,8192,65536,0.22518399357795715
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,8192,7168,0.036768000572919846
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,8192,7168,0.03232000023126602
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,8192,6144,0.03948799893260002
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,8192,6144,0.03939199820160866
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,8192,6144,0.02924799919128418
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,8192,65536,0.26131200790405273
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,8192,5120,0.041439998894929886
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,8192,5120,0.038336001336574554
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,8192,4096,0.029503999277949333
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,8192,4096,0.02627200074493885
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,8192,5120,0.025919999927282333
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,8192,3584,0.027327999472618103
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,8192,3584,0.03139200061559677
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,8192,4096,0.025887999683618546
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,8192,3584,0.019999999552965164
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,8192,65536,0.23849600553512573
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,8192,3072,0.024191999807953835
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,8192,3072,0.0226879995316267
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,8192,3072,0.01759999990463257
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,8192,2560,0.02537599951028824
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,8192,2560,0.025567999109625816
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,8192,2048,0.018559999763965607
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,8192,2560,0.016095999628305435
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,8192,2048,0.01772800087928772
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,8192,2048,0.014431999996304512
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,8192,1536,0.021856000646948814
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,8192,1536,0.015263999812304974
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,8192,1536,0.01398400031030178
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,8192,768,0.018112000077962875
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,8192,1024,0.021247999742627144
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,8192,768,0.01398400031030178
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,8192,1024,0.013024000450968742
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,8192,768,0.011711999773979187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,8192,512,0.013535999692976475
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,8192,512,0.011168000288307667
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,8192,256,0.010239999741315842
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,8192,512,0.01033599954098463
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,8192,256,0.010367999784648418
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,8192,128,0.012543999589979649
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,8192,256,0.008736000396311283
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,8192,128,0.009440000168979168
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,8192,128,0.00800000037997961
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,8192,64,0.013439999893307686
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,8192,64,0.01321600005030632
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,8192,64,0.0074880002066493034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,8192,32,0.014015999622642994
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,8192,32,0.01360000018030405
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,8192,32,0.007840000092983246
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,8192,1024,0.015072000212967396
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,7168,12288,0.06870400160551071
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,7168,16384,0.05846399813890457
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,7168,12288,0.06784000247716904
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,7168,16384,0.06412799656391144
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,7168,16384,0.05728000029921532
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,7168,10240,0.043327998369932175
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,7168,10240,0.05728000029921532
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,7168,12288,0.049215998500585556
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,7168,8192,0.048448000103235245
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,7168,10240,0.042367998510599136
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,7168,7168,0.04383999854326248
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,7168,8192,0.03964800015091896
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,7168,65536,0.1942719966173172
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,7168,8192,0.03347200155258179
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,7168,7168,0.04064000025391579
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,7168,6144,0.039712000638246536
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,7168,65536,0.21609599888324738
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,7168,6144,0.035071998834609985
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,7168,7168,0.03126399964094162
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,7168,5120,0.03324799984693527
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,7168,6144,0.028736000880599022
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,7168,5120,0.030751999467611313
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,7168,4096,0.027904000133275986
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,7168,5120,0.02505600079894066
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,7168,4096,0.03484800085425377
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,7168,3584,0.026528000831604004
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,7168,4096,0.020608000457286835
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,7168,3584,0.02425600029528141
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,7168,3584,0.01865600049495697
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,7168,3072,0.024032000452280045
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,7168,65536,0.22627200186252594
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,7168,3072,0.02860799990594387
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,7168,3072,0.01759999990463257
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,7168,2560,0.02131200022995472
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,7168,2560,0.025151999667286873
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,7168,2560,0.01600000075995922
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,7168,2048,0.017823999747633934
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,7168,2048,0.016992000862956047
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,7168,2048,0.014368000440299511
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,7168,1536,0.018303999677300453
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,7168,1536,0.015904000028967857
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,7168,1024,0.013504000380635262
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,7168,1536,0.012896000407636166
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,7168,1024,0.013055999763309956
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,7168,1024,0.012543999589979649
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,7168,768,0.01375999953597784
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,7168,768,0.01206399966031313
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,7168,768,0.013504000380635262
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,7168,512,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,7168,512,0.010879999957978725
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,7168,512,0.01104000024497509
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,7168,256,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,7168,256,0.00979200005531311
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,7168,256,0.008608000352978706
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,7168,128,0.009600000455975533
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,7168,128,0.009247999638319016
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,7168,64,0.013024000450968742
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,7168,64,0.013055999763309956
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,7168,128,0.007679999805986881
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,7168,64,0.007648000027984381
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,7168,32,0.013887999579310417
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,7168,32,0.01369599997997284
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,7168,32,0.007712000049650669
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,6144,12288,0.049984000623226166
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,6144,12288,0.06838399916887283
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,6144,16384,0.08934400230646133
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,6144,16384,0.05795200169086456
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,6144,16384,0.056992001831531525
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,6144,10240,0.058559998869895935
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,6144,10240,0.04464000090956688
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,6144,10240,0.04179200157523155
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,6144,12288,0.04684799909591675
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,6144,8192,0.04163200035691261
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,6144,8192,0.036479998379945755
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,6144,65536,0.1576640009880066
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,6144,7168,0.037696000188589096
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,6144,8192,0.03328000009059906
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,6144,65536,0.1947840005159378
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,6144,7168,0.030719999223947525
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,6144,6144,0.04012800008058548
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,6144,5120,0.032735999673604965
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,6144,6144,0.03753599897027016
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,6144,6144,0.027456000447273254
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,6144,5120,0.034304000437259674
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,6144,4096,0.031328000128269196
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,6144,4096,0.024032000452280045
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,6144,5120,0.024480000138282776
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,6144,4096,0.024032000452280045
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,6144,3584,0.025248000398278236
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,6144,7168,0.0344959981739521
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,6144,65536,0.20307199656963348
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,6144,3584,0.022624000906944275
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,6144,3584,0.018719999119639397
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,6144,3072,0.026528000831604004
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,6144,3072,0.028511999174952507
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,6144,3072,0.017311999574303627
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,6144,2560,0.020255999639630318
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,6144,2560,0.018271999433636665
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,6144,2560,0.015807999297976494
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,6144,2048,0.022943999618291855
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,6144,2048,0.01664000004529953
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,6144,1536,0.015263999812304974
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,6144,2048,0.015647999942302704
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,6144,1536,0.014399999752640724
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,6144,1024,0.01942400075495243
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,6144,1536,0.012703999876976013
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,6144,1024,0.01727999933063984
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,6144,768,0.01756799966096878
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,6144,768,0.013824000023305416
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,6144,1024,0.011455999687314034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,6144,768,0.013183999806642532
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,6144,512,0.010688000358641148
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,6144,512,0.010495999827980995
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,6144,512,0.010879999957978725
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,6144,256,0.010623999871313572
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,6144,256,0.01548799965530634
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,6144,128,0.009472000412642956
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,6144,256,0.009279999881982803
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,6144,128,0.009440000168979168
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,6144,128,0.0080960001796484
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,6144,64,0.012736000120639801
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,6144,64,0.012959999963641167
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,6144,32,0.01360000018030405
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,6144,64,0.007327999919652939
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,6144,32,0.013472000136971474
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,6144,32,0.007584000006318092
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,5120,12288,0.06908799707889557
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,5120,12288,0.04822399839758873
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,5120,16384,0.08924800157546997
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,5120,16384,0.056992001831531525
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,5120,16384,0.05644800141453743
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,5120,12288,0.04556800052523613
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,5120,10240,0.040991999208927155
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,5120,10240,0.039264000952243805
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,5120,8192,0.035840000957250595
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,5120,10240,0.039903998374938965
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,5120,8192,0.036768000572919846
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,5120,7168,0.03593600168824196
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,5120,8192,0.033344000577926636
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,5120,7168,0.04368000105023384
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,5120,65536,0.3266240060329437
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,5120,65536,0.16764800250530243
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,5120,7168,0.03014400042593479
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,5120,6144,0.033440001308918
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,5120,6144,0.03097599931061268
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,5120,6144,0.02643200010061264
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,5120,5120,0.028031999245285988
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,5120,5120,0.03171199932694435
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,5120,5120,0.022752000018954277
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,5120,4096,0.023584000766277313
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,5120,4096,0.024607999250292778
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,5120,3584,0.02921600081026554
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,5120,4096,0.020287999883294106
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,5120,3584,0.031072000041604042
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,5120,3584,0.019039999693632126
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,5120,3072,0.019487999379634857
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,5120,65536,0.20243200659751892
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,5120,3072,0.02054399996995926
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,5120,3072,0.019231999292969704
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,5120,2560,0.025407999753952026
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,5120,2560,0.018559999763965607
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,5120,2560,0.01583999954164028
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,5120,2048,0.02348800003528595
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,5120,2048,0.0163199994713068
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,5120,2048,0.014816000126302242
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,5120,1536,0.014431999996304512
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,5120,1536,0.014527999795973301
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,5120,1536,0.012736000120639801
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,5120,1024,0.019648000597953796
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,5120,1024,0.015936000272631645
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,5120,1024,0.01104000024497509
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,5120,768,0.011615999974310398
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,5120,768,0.011455999687314034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,5120,512,0.014303999952971935
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,5120,768,0.012671999633312225
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,5120,512,0.012128000147640705
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,5120,256,0.009696000255644321
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,5120,512,0.011168000288307667
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,5120,256,0.009375999681651592
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,5120,256,0.008448000065982342
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,5120,128,0.009696000255644321
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,5120,128,0.009600000455975533
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,5120,64,0.012896000407636166
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,5120,64,0.013024000450968742
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,5120,32,0.014592000283300877
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,5120,64,0.007935999892652035
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,5120,32,0.013472000136971474
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,5120,32,0.007391999941319227
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,4096,12288,0.0427200011909008
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,4096,16384,0.05276799947023392
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,4096,16384,0.04848000034689903
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,5120,128,0.007872000336647034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,4096,12288,0.038975998759269714
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,4096,16384,0.040352001786231995
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,4096,10240,0.037567999213933945
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,4096,12288,0.035679999738931656
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,4096,8192,0.033344000577926636
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,4096,10240,0.03577600046992302
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,4096,10240,0.03222399950027466
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,4096,8192,0.03440000116825104
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,4096,65536,0.16921600699424744
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,4096,65536,0.1414400041103363
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,4096,7168,0.031072000041604042
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,4096,8192,0.02521600015461445
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,4096,7168,0.03276799991726875
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,4096,7168,0.022943999618291855
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,4096,6144,0.030751999467611313
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,4096,6144,0.02800000086426735
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,4096,5120,0.026623999699950218
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,4096,5120,0.0297279991209507
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,4096,6144,0.020479999482631683
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,4096,4096,0.023072000592947006
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,4096,5120,0.019168000668287277
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,4096,4096,0.03340800106525421
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,4096,4096,0.015424000099301338
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,4096,3584,0.020927999168634415
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,4096,3584,0.01990400068461895
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,4096,3584,0.014688000082969666
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,4096,3072,0.019967999309301376
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,4096,65536,0.13104000687599182
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,4096,3072,0.027424000203609467
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,4096,3072,0.013824000023305416
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,4096,2560,0.01852799952030182
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,4096,2560,0.016575999557971954
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,4096,2560,0.014208000153303146
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,4096,2048,0.01600000075995922
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,4096,2048,0.015615999698638916
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,4096,2048,0.012896000407636166
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,4096,1536,0.014047999866306782
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,4096,1536,0.013088000006973743
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,4096,1024,0.012256000190973282
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,4096,1536,0.011711999773979187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,4096,1024,0.011872000060975552
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,4096,768,0.012095999903976917
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,4096,1024,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,4096,512,0.010400000028312206
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,4096,768,0.01375999953597784
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,4096,768,0.009920000098645687
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,4096,512,0.010015999898314476
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,4096,512,0.009184000082314014
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,4096,256,0.009664000011980534
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,4096,256,0.009696000255644321
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,4096,256,0.007872000336647034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,4096,128,0.00902399979531765
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,4096,128,0.009279999881982803
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,4096,128,0.007712000049650669
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,4096,64,0.012575999833643436
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,4096,64,0.013856000266969204
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,4096,32,0.013088000006973743
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,4096,64,0.007679999805986881
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,4096,32,0.012864000163972378
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,4096,32,0.007296000141650438
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,3584,12288,0.03782400116324425
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,3584,12288,0.04131200164556503
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,3584,16384,0.044256001710891724
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,3584,16384,0.043168000876903534
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,3584,16384,0.03888000175356865
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,3584,12288,0.033824000507593155
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,3584,10240,0.03884800150990486
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,3584,10240,0.03526400029659271
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,3584,8192,0.034623999148607254
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,3584,10240,0.030688000842928886
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,3584,8192,0.03296000137925148
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,3584,7168,0.03174399957060814
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,3584,8192,0.023615999147295952
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,3584,7168,0.030047999694943428
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,3584,6144,0.029343999922275543
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,3584,65536,0.12716799974441528
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,3584,7168,0.022143999114632607
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,3584,6144,0.027871999889612198
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,3584,6144,0.021344000473618507
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,3584,5120,0.026655999943614006
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,3584,65536,0.10815999656915665
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,3584,5120,0.024768000468611717
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,3584,4096,0.022431999444961548
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,3584,4096,0.021727999672293663
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,3584,5120,0.020479999482631683
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,3584,3584,0.02051199972629547
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,3584,65536,0.12614400684833527
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,3584,4096,0.015296000055968761
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,3584,3584,0.015615999698638916
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,3584,3072,0.019200000911951065
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,3584,2560,0.016672000288963318
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,3584,2560,0.01711999997496605
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,3584,3072,0.014879999682307243
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,3584,3072,0.018112000077962875
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,3584,2560,0.013311999849975109
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,3584,2048,0.015296000055968761
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,3584,2048,0.015647999942302704
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,3584,1536,0.014175999909639359
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,3584,1536,0.013439999893307686
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,3584,2048,0.011872000060975552
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,3584,1536,0.011168000288307667
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,3584,1024,0.012608000077307224
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,3584,1024,0.011744000017642975
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,3584,1024,0.010432000271975994
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,3584,768,0.01104000024497509
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,3584,768,0.01142400037497282
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,3584,512,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,3584,768,0.009535999968647957
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,3584,3584,0.01974399946630001
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,3584,512,0.010432000271975994
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,3584,256,0.009824000298976898
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,3584,512,0.009184000082314014
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,3584,256,0.00940799992531538
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,3584,128,0.009184000082314014
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,3584,256,0.008415999822318554
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,3584,128,0.008895999751985073
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,3584,128,0.007360000163316727
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,3584,64,0.013120000250637531
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,3584,64,0.012671999633312225
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,3584,32,0.012896000407636166
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,3584,32,0.013055999763309956
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,3584,64,0.007455999962985516
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,3584,32,0.007391999941319227
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,3072,12288,0.04124800115823746
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,3072,12288,0.03814399987459183
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,3072,16384,0.045343998819589615
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,3072,16384,0.04364800080657005
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,3072,12288,0.03215999901294708
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,3072,16384,0.03657599911093712
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,3072,10240,0.03721600025892258
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,3072,10240,0.033663999289274216
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,3072,10240,0.028511999174952507
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,3072,8192,0.030688000842928886
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,3072,8192,0.030527999624609947
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,3072,8192,0.026976000517606735
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,3072,7168,0.02860799990594387
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,3072,65536,0.11151999980211258
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,3072,7168,0.028960000723600388
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,3072,6144,0.03062400035560131
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,3072,7168,0.021183999255299568
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,3072,65536,0.0976639986038208
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,3072,6144,0.03267199918627739
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,3072,5120,0.02611199952661991
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,3072,6144,0.02131200022995472
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,3072,5120,0.02473600022494793
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,3072,5120,0.017216000705957413
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,3072,4096,0.022431999444961548
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,3072,4096,0.02147199958562851
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,3072,65536,0.12294399738311768
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,3072,3584,0.02099199965596199
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,3072,3584,0.03046399913728237
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,3072,4096,0.02035200037062168
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,3072,3584,0.014944000169634819
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,3072,3072,0.02687999978661537
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,3072,3072,0.014271999709308147
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,3072,3072,0.019007999449968338
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,3072,2560,0.017376000061631203
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,3072,2560,0.02409599907696247
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,3072,2048,0.016127999871969223
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,3072,2048,0.015104000456631184
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,3072,2560,0.01244799979031086
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,3072,1536,0.013632000423967838
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,3072,1536,0.013791999779641628
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,3072,2048,0.013663999736309052
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,3072,1024,0.01158399973064661
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,3072,1536,0.011680000461637974
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,3072,1024,0.012160000391304493
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,3072,768,0.011327999643981457
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,3072,768,0.011103999800980091
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,3072,1024,0.010432000271975994
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,3072,768,0.011839999817311764
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,3072,512,0.0098879998549819
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,3072,512,0.011872000060975552
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,3072,256,0.009503999724984169
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,3072,512,0.009952000342309475
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,3072,128,0.009855999611318111
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,3072,256,0.009664000011980534
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,3072,256,0.007872000336647034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,3072,128,0.009247999638319016
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,3072,128,0.007327999919652939
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,3072,64,0.012640000320971012
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,3072,64,0.013439999893307686
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,3072,64,0.007424000184983015
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,3072,32,0.013024000450968742
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,3072,32,0.013120000250637531
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,3072,32,0.007455999962985516
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,2560,12288,0.039903998374938965
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,2560,12288,0.035071998834609985
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,2560,16384,0.04255999997258186
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,2560,16384,0.04255999997258186
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,2560,12288,0.03094400092959404
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,2560,10240,0.03718400001525879
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,2560,10240,0.03126399964094162
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,2560,10240,0.026944000273942947
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,2560,8192,0.035071998834609985
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,2560,8192,0.030208000913262367
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,2560,65536,0.08441600203514099
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,2560,65536,0.104032002389431
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,2560,8192,0.02239999920129776
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,2560,7168,0.03187200054526329
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,2560,16384,0.03548799827694893
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,2560,7168,0.026944000273942947
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,2560,7168,0.02457600086927414
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,2560,6144,0.026944000273942947
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,2560,6144,0.02595200017094612
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,2560,5120,0.025536000728607178
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,2560,6144,0.017664000391960144
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,2560,5120,0.024512000381946564
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,2560,5120,0.016575999557971954
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,2560,4096,0.022943999618291855
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,2560,65536,0.11999999731779099
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,2560,3584,0.01990400068461895
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,2560,3584,0.020608000457286835
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,2560,4096,0.020864000543951988
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,2560,4096,0.014240000396966934
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,2560,3584,0.013632000423967838
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,2560,3072,0.01974399946630001
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,2560,3072,0.018271999433636665
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,2560,3072,0.013887999579310417
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,2560,2560,0.016992000862956047
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,2560,2560,0.01244799979031086
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,2560,2560,0.016831999644637108
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,2560,2048,0.01548799965530634
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,2560,2048,0.014976000413298607
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,2560,2048,0.014336000196635723
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,2560,1536,0.014112000353634357
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,2560,1536,0.012959999963641167
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,2560,1024,0.012223999947309494
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,2560,1024,0.012415999546647072
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,2560,1536,0.01017600018531084
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,2560,1024,0.009727999567985535
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,2560,768,0.011327999643981457
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,2560,768,0.011168000288307667
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,2560,512,0.0098879998549819
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,2560,768,0.009535999968647957
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,2560,512,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,2560,256,0.00979200005531311
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,2560,512,0.00979200005531311
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,2560,256,0.00902399979531765
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,2560,256,0.00800000037997961
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,2560,128,0.009184000082314014
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,2560,128,0.008927999995648861
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,2560,64,0.012896000407636166
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,2560,128,0.007712000049650669
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,2560,64,0.012799999676644802
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,2560,32,0.012512000277638435
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,2560,64,0.0071680000983178616
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,2560,32,0.012736000120639801
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,2560,32,0.007104000076651573
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,2048,12288,0.03187200054526329
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,2048,12288,0.031647998839616776
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,2048,16384,0.03638400137424469
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,2048,16384,0.044064000248909
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,2048,12288,0.03017600066959858
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,2048,16384,0.034912001341581345
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,2048,10240,0.03811199963092804
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,2048,10240,0.02844800055027008
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,2048,10240,0.041120000183582306
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,2048,8192,0.028031999245285988
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,2048,8192,0.025919999927282333
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,2048,7168,0.026016000658273697
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,2048,8192,0.022304000332951546
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,2048,65536,0.09596800059080124
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,2048,65536,0.09167999774217606
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,2048,7168,0.02454400062561035
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,2048,6144,0.026559999212622643
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,2048,6144,0.024800000712275505
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,2048,7168,0.030592000111937523
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,2048,5120,0.02611199952661991
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,2048,6144,0.025760000571608543
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,2048,5120,0.024064000695943832
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,2048,5120,0.016287999227643013
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,2048,4096,0.020800000056624413
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,2048,4096,0.020896000787615776
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,2048,3584,0.020640000700950623
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,2048,3584,0.01945599913597107
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,2048,4096,0.014112000353634357
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,2048,3584,0.015584000386297703
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,2048,65536,0.11830399930477142
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,2048,3072,0.018079999834299088
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,2048,3072,0.018559999763965607
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,2048,2560,0.01648000068962574
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,2048,3072,0.016224000602960587
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,2048,2560,0.016383999958634377
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,2048,2048,0.014368000440299511
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,2048,2560,0.011648000217974186
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,2048,2048,0.014431999996304512
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,2048,2048,0.011103999800980091
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,2048,1536,0.013728000223636627
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,2048,1536,0.010688000358641148
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,2048,1024,0.011744000017642975
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,2048,1024,0.011648000217974186
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,2048,1024,0.009535999968647957
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,2048,768,0.010847999714314938
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,2048,768,0.01119999960064888
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,2048,768,0.011807999573647976
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,2048,512,0.009920000098645687
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,2048,256,0.009056000038981438
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,2048,512,0.010367999784648418
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,2048,512,0.009088000282645226
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,2048,256,0.009344000369310379
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,2048,256,0.007679999805986881
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,2048,128,0.009088000282645226
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,2048,128,0.009631999768316746
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,2048,64,0.012671999633312225
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,2048,128,0.007135999854654074
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,2048,64,0.012768000364303589
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,2048,32,0.012608000077307224
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,2048,64,0.007199999876320362
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,2048,32,0.01235199999064207
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,2048,1536,0.013088000006973743
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,2048,32,0.007552000228315592
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,1536,12288,0.030559999868273735
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,1536,12288,0.030848000198602676
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,1536,16384,0.04419200122356415
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,1536,16384,0.0350399985909462
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,1536,16384,0.03455999866127968
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,1536,10240,0.02969600073993206
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,1536,12288,0.047648001462221146
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,1536,10240,0.026240000501275063
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,1536,10240,0.03961599990725517
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,1536,8192,0.027583999559283257
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,1536,8192,0.02643200010061264
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,1536,8192,0.03248000144958496
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,1536,7168,0.023584000766277313
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,1536,65536,0.06806399673223495
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,1536,65536,0.0777600035071373
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,1536,6144,0.028896000236272812
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,1536,7168,0.02364799939095974
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,1536,7168,0.028991999104619026
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,1536,6144,0.02316799946129322
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,1536,5120,0.0225600004196167
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,1536,6144,0.017791999503970146
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,1536,5120,0.021503999829292297
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,1536,4096,0.021983999758958817
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,1536,4096,0.02393599972128868
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,1536,5120,0.015968000516295433
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,1536,4096,0.019392000511288643
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,1536,3584,0.02022399939596653
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,1536,3584,0.02006400004029274
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,1536,65536,0.1170239970088005
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,1536,3584,0.017823999747633934
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,1536,3072,0.01724799908697605
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,1536,3072,0.018079999834299088
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,1536,2560,0.016095999628305435
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,1536,2560,0.017343999817967415
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,1536,2048,0.015039999969303608
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,1536,2560,0.014431999996304512
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,1536,3072,0.013728000223636627
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,1536,2048,0.014976000413298607
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,1536,1536,0.012640000320971012
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,1536,2048,0.01119999960064888
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,1536,1536,0.012992000207304955
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,1536,1024,0.011935999616980553
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,1536,1536,0.01027199998497963
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,1536,768,0.011231999844312668
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,1536,1024,0.011839999817311764
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,1536,1024,0.009759999811649323
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,1536,768,0.010688000358641148
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,1536,768,0.011776000261306763
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,1536,512,0.010015999898314476
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,1536,512,0.010111999697983265
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,1536,512,0.009727999567985535
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,1536,256,0.009440000168979168
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,1536,256,0.009088000282645226
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,1536,256,0.008415999822318554
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,1536,128,0.008799999952316284
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,1536,128,0.00886400043964386
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,1536,128,0.007007999811321497
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,1536,64,0.012799999676644802
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,1536,64,0.012415999546647072
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,1536,32,0.012768000364303589
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,1536,64,0.007104000076651573
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,1536,32,0.012992000207304955
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,1536,32,0.007455999962985516
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,1024,12288,0.025760000571608543
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,1024,12288,0.025119999423623085
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,1024,16384,0.03299200162291527
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,1024,16384,0.030527999624609947
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,1024,16384,0.03446400165557861
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,1024,12288,0.028351999819278717
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,1024,10240,0.02409599907696247
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,1024,10240,0.025280000641942024
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,1024,10240,0.039103999733924866
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,1024,8192,0.024831999093294144
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,1024,7168,0.02300800010561943
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,1024,8192,0.022175999358296394
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,1024,65536,0.061535999178886414
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,1024,65536,0.06143999844789505
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,1024,7168,0.022752000018954277
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,1024,6144,0.026784000918269157
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,1024,7168,0.01974399946630001
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,1024,6144,0.02252800017595291
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,1024,5120,0.025280000641942024
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,1024,8192,0.02380800060927868
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,1024,6144,0.01788800023496151
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,1024,5120,0.021407999098300934
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,1024,4096,0.02191999927163124
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,1024,4096,0.02038400061428547
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,1024,65536,0.11689600348472595
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,1024,5120,0.01583999954164028
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,1024,3584,0.019519999623298645
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,1024,4096,0.018751999363303185
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,1024,3584,0.019328000023961067
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,1024,3584,0.013151999562978745
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,1024,3072,0.012608000077307224
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,1024,3072,0.016896000131964684
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,1024,2560,0.01635199971497059
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,1024,3072,0.017952000722289085
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,1024,2560,0.016287999227643013
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,1024,2048,0.01539199985563755
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,1024,2560,0.011711999773979187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,1024,2048,0.014976000413298607
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,1024,1536,0.012736000120639801
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,1024,1536,0.013279999606311321
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,1024,2048,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,1024,1024,0.01190400030463934
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,1024,1536,0.009983999654650688
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,1024,1024,0.012191999703645706
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,1024,1024,0.00979200005531311
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,1024,768,0.010847999714314938
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,1024,768,0.01056000031530857
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,1024,512,0.010015999898314476
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,1024,512,0.009920000098645687
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,1024,768,0.008799999952316284
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,1024,512,0.009184000082314014
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,1024,256,0.009568000212311745
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,1024,256,0.009312000125646591
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,1024,256,0.0077760000713169575
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,1024,128,0.008448000065982342
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,1024,128,0.008799999952316284
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,1024,128,0.007455999962985516
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,1024,64,0.01244799979031086
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,1024,64,0.012864000163972378
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,1024,64,0.00723200011998415
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,1024,32,0.012415999546647072
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,1024,32,0.012768000364303589
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,1024,32,0.006912000011652708
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,768,12288,0.024927999824285507
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,768,12288,0.024351999163627625
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,768,16384,0.028543999418616295
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,768,16384,0.028831999748945236
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,768,16384,0.040063999593257904
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,768,12288,0.028351999819278717
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,768,10240,0.024351999163627625
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,768,10240,0.023871999233961105
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,768,10240,0.025536000728607178
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,768,8192,0.02348800003528595
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,768,8192,0.02252800017595291
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,768,8192,0.02191999927163124
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,768,7168,0.023871999233961105
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,768,65536,0.05350400134921074
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,768,65536,0.056063998490571976
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,768,7168,0.02284800074994564
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,768,6144,0.02348800003528595
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,768,7168,0.029472000896930695
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,768,5120,0.022592000663280487
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,768,6144,0.023903999477624893
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,768,5120,0.019840000197291374
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,768,6144,0.017535999417304993
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,768,5120,0.01583999954164028
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,768,4096,0.019071999937295914
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,768,3584,0.01926399953663349
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,768,4096,0.019551999866962433
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,768,4096,0.014112000353634357
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,768,3584,0.018783999606966972
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,768,65536,0.11635199934244156
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,768,3584,0.0144640002399683
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,768,3072,0.01775999926030636
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,768,3072,0.017920000478625298
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,768,2560,0.017055999487638474
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,768,3072,0.012512000277638435
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,768,2560,0.014271999709308147
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,768,2560,0.01945599913597107
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,768,2048,0.014399999752640724
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,768,2048,0.014751999638974667
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,768,2048,0.011744000017642975
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,768,1536,0.012768000364303589
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,768,1024,0.011615999974310398
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,768,1024,0.011711999773979187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,768,1536,0.012959999963641167
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,768,1024,0.010432000271975994
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,768,1536,0.010143999941647053
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,768,768,0.01119999960064888
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,768,768,0.01027199998497963
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,768,768,0.011168000288307667
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,768,512,0.009855999611318111
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,768,512,0.009759999811649323
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,768,256,0.008927999995648861
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,768,512,0.009759999811649323
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,768,256,0.008224000222980976
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,768,128,0.008927999995648861
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,768,128,0.009247999638319016
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,768,128,0.007327999919652939
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,768,64,0.01244799979031086
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,768,64,0.01244799979031086
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,768,64,0.00723200011998415
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,768,32,0.012768000364303589
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,768,32,0.01228800043463707
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,768,32,0.006912000011652708
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,512,12288,0.026655999943614006
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,512,16384,0.029472000896930695
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,512,16384,0.029023999348282814
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,768,256,0.009568000212311745
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,512,16384,0.034304000437259674
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,512,12288,0.02687999978661537
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,512,12288,0.02848000079393387
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,512,10240,0.02643200010061264
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,512,10240,0.022624000906944275
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,512,8192,0.025567999109625816
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,512,10240,0.03782400116324425
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,512,8192,0.02160000056028366
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,512,65536,0.04572800174355507
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,512,65536,0.05539200082421303
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,512,7168,0.02473600022494793
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,512,8192,0.021695999428629875
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,512,7168,0.020767999812960625
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,512,6144,0.022016000002622604
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,512,7168,0.02876799926161766
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,512,6144,0.020927999168634415
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,512,5120,0.024447999894618988
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,512,6144,0.025087999179959297
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,512,5120,0.020608000457286835
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,512,5120,0.01548799965530634
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,512,65536,0.11686400324106216
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,512,4096,0.020447999238967896
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,512,3584,0.020287999883294106
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,512,4096,0.020255999639630318
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,512,4096,0.014112000353634357
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,512,3584,0.019519999623298645
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,512,3584,0.013183999806642532
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,512,3072,0.01756799966096878
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,512,3072,0.01775999926030636
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,512,3072,0.01244799979031086
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,512,2560,0.01651199907064438
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,512,2560,0.015807999297976494
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,512,2560,0.011392000131309032
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,512,2048,0.01484800036996603
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,512,2048,0.014655999839305878
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,512,1536,0.012799999676644802
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,512,2048,0.010688000358641148
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,512,1536,0.013183999806642532
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,512,1024,0.011744000017642975
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,512,1024,0.011359999887645245
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,512,1536,0.009824000298976898
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,512,768,0.01056000031530857
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,512,1024,0.010528000071644783
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,512,768,0.01056000031530857
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,512,768,0.010847999714314938
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,512,512,0.009983999654650688
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,512,512,0.009535999968647957
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,512,256,0.009279999881982803
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,512,512,0.008991999551653862
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,512,256,0.00940799992531538
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,512,256,0.008287999778985977
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,512,128,0.008895999751985073
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,512,128,0.009216000325977802
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,512,64,0.012768000364303589
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,512,128,0.007360000163316727
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,512,64,0.00684799998998642
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,512,64,0.012095999903976917
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,512,32,0.012703999876976013
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,512,32,0.012512000277638435
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,512,32,0.007040000054985285
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,256,12288,0.0363520011305809
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,256,12288,0.024960000067949295
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,256,16384,0.03622400015592575
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,256,16384,0.026496000587940216
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,256,16384,0.03948799893260002
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,256,12288,0.045152001082897186
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,256,10240,0.023903999477624893
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,256,10240,0.024159999564290047
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,256,8192,0.023455999791622162
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,256,10240,0.03836800158023834
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,256,8192,0.023104000836610794
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,256,7168,0.023104000836610794
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,256,65536,0.0461760014295578
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,256,8192,0.021407999098300934
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,256,65536,0.04320000112056732
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,256,7168,0.029343999922275543
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,256,6144,0.02332800067961216
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,256,7168,0.02304000034928322
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,256,6144,0.02396799996495247
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,256,5120,0.022336000576615334
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,256,5120,0.022112000733613968
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,256,6144,0.017152000218629837
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,256,4096,0.020927999168634415
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,256,5120,0.02239999920129776
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,256,65536,0.1167680025100708
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,256,4096,0.018624000251293182
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,256,3584,0.01894400082528591
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,256,3584,0.01894400082528591
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,256,3072,0.01865600049495697
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,256,3584,0.013055999763309956
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,256,3072,0.01788800023496151
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,256,3072,0.01568000018596649
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,256,2560,0.015807999297976494
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,256,2048,0.01532800029963255
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,256,2560,0.011744000017642975
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,256,2560,0.01600000075995922
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,256,2048,0.0144640002399683
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,256,2048,0.010847999714314938
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,256,1536,0.013279999606311321
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,256,4096,0.02022399939596653
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,256,1536,0.013344000093638897
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,256,1024,0.011231999844312668
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,256,1024,0.012160000391304493
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,256,768,0.010591999627649784
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,256,1536,0.01033599954098463
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,256,1024,0.009696000255644321
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,256,768,0.010688000358641148
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,256,768,0.01056000031530857
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,256,512,0.010208000428974628
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,256,256,0.009216000325977802
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,256,512,0.009600000455975533
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,256,256,0.009375999681651592
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,256,128,0.009088000282645226
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,256,512,0.009216000325977802
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,256,256,0.007872000336647034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,256,128,0.008799999952316284
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,256,128,0.00723200011998415
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,256,64,0.01228800043463707
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,256,64,0.012319999746978283
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,256,32,0.012768000364303589
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,256,64,0.006688000168651342
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,256,32,0.012896000407636166
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,256,32,0.006752000190317631
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,128,12288,0.03126399964094162
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,128,12288,0.02489599958062172
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,128,16384,0.02739199995994568
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,128,16384,0.03404799848794937
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,128,16384,0.027456000447273254
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,128,12288,0.04428799822926521
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,128,10240,0.024383999407291412
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,128,10240,0.023744000121951103
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,128,8192,0.023520000278949738
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,128,8192,0.02396799996495247
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,128,10240,0.038656000047922134
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,128,7168,0.022816000506281853
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,128,65536,0.06115199998021126
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,128,7168,0.02377600036561489
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,128,65536,0.0414079986512661
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,128,8192,0.021824000403285027
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,128,6144,0.024607999250292778
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,128,7168,0.02304000034928322
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,128,6144,0.02364799939095974
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,128,5120,0.020640000700950623
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,128,5120,0.02127999998629093
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,128,6144,0.017343999817967415
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,128,4096,0.021727999672293663
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,128,5120,0.015456000342965126
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,128,4096,0.020031999796628952
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,128,3584,0.018112000077962875
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,128,65536,0.11599999666213989
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,128,3584,0.01926399953663349
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,128,4096,0.013472000136971474
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,128,3584,0.01398400031030178
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,128,3072,0.017472000792622566
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,128,3072,0.018624000251293182
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,128,3072,0.012480000033974648
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,128,2560,0.01616000011563301
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,128,2560,0.01539199985563755
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,128,2048,0.014240000396966934
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,128,2560,0.011168000288307667
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,128,2048,0.014303999952971935
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,128,1536,0.01360000018030405
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,128,2048,0.010304000228643417
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,128,1536,0.013407999649643898
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,128,1536,0.010208000428974628
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,128,1024,0.011071999557316303
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,128,1024,0.01158399973064661
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,128,768,0.01033599954098463
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,128,1024,0.009216000325977802
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,128,768,0.01056000031530857
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,128,512,0.009855999611318111
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,128,768,0.010432000271975994
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,128,512,0.009920000098645687
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,128,256,0.008991999551653862
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,128,512,0.008991999551653862
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,128,256,0.00886400043964386
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,128,256,0.007903999648988247
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,128,128,0.008991999551653862
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,128,128,0.009216000325977802
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,128,64,0.012736000120639801
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,128,128,0.007455999962985516
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,128,64,0.012032000347971916
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,128,64,0.007104000076651573
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,128,32,0.012864000163972378
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,128,32,0.012512000277638435
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,128,32,0.007104000076651573
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,64,12288,0.025439999997615814
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,64,12288,0.025407999753952026
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,64,16384,0.028095999732613564
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,64,16384,0.02739199995994568
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,64,16384,0.03407999873161316
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,64,12288,0.04508800059556961
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,64,10240,0.02412799932062626
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,64,10240,0.024512000381946564
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,64,10240,0.036607999354600906
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,64,8192,0.02454400062561035
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,64,8192,0.02319999970495701
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,64,8192,0.03129599988460541
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,64,7168,0.024191999807953835
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,64,65536,0.04217600077390671
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,64,7168,0.023360000923275948
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,64,7168,0.030047999694943428
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,64,6144,0.024159999564290047
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,64,6144,0.025087999179959297
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,64,6144,0.017023999243974686
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,64,5120,0.021727999672293663
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,64,5120,0.021247999742627144
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,64,5120,0.015519999898970127
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,64,4096,0.0208320003002882
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,64,65536,0.11503999680280685
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,64,4096,0.019071999937295914
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,64,4096,0.019936000928282738
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,64,3584,0.020096000283956528
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,64,3584,0.01894400082528591
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,64,3584,0.014688000082969666
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,64,65536,0.0414079986512661
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,64,3072,0.018559999763965607
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,64,3072,0.017503999173641205
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,64,2560,0.01583999954164028
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,64,3072,0.01228800043463707
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,64,2560,0.016416000202298164
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,64,2560,0.010879999957978725
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,64,2048,0.015296000055968761
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,64,2048,0.0144640002399683
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,64,1536,0.013376000337302685
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,64,1536,0.01283199992030859
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,64,2048,0.010591999627649784
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,64,1024,0.011168000288307667
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,64,1536,0.009568000212311745
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,64,1024,0.01119999960064888
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,64,768,0.010400000028312206
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,64,768,0.010591999627649784
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,64,512,0.01027199998497963
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,64,768,0.008991999551653862
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,64,1024,0.010080000385642052
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,64,512,0.0098879998549819
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,64,512,0.007807999849319458
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,64,256,0.008799999952316284
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,64,128,0.008799999952316284
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,64,256,0.00902399979531765
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,64,256,0.007968000136315823
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,64,128,0.008799999952316284
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,64,128,0.007104000076651573
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,64,64,0.01235199999064207
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,64,64,0.012512000277638435
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,64,64,0.006688000168651342
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,64,32,0.012319999746978283
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,64,32,0.012319999746978283
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,64,32,0.006783999968320131
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,32,12288,0.025119999423623085
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,32,12288,0.025631999596953392
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,32,16384,0.026944000273942947
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,32,16384,0.02812799997627735
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,32,16384,0.05020799860358238
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,32,12288,0.042688000947237015
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,32,10240,0.024800000712275505
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,32,10240,0.024032000452280045
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,32,10240,0.033695999532938004
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,32,8192,0.024480000138282776
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,32,65536,0.04224000126123428
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,32,65536,0.04102399945259094
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,32,8192,0.023711999878287315
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,32,7168,0.02412799932062626
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,32,8192,0.030912000685930252
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,32,7168,0.02287999913096428
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,32,7168,0.02783999964594841
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,32,5120,0.021536000072956085
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,32,6144,0.024191999807953835
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,32,6144,0.024320000782608986
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,32,6144,0.02537599951028824
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,32,5120,0.021215999498963356
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,32,65536,0.11203200370073318
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,32,5120,0.020096000283956528
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,32,4096,0.019168000668287277
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,32,4096,0.02099199965596199
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,32,4096,0.018271999433636665
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,32,3584,0.02022399939596653
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,32,3584,0.01852799952030182
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,32,3584,0.01696000061929226
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,32,3072,0.016607999801635742
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,32,3072,0.01708799973130226
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,32,3072,0.014495999552309513
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,32,2560,0.01568000018596649
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,32,2560,0.015744000673294067
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,32,2560,0.013663999736309052
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,32,2048,0.014399999752640724
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,32,2048,0.011615999974310398
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,32,1536,0.013856000266969204
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,32,1536,0.013344000093638897
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,32,1024,0.011296000331640244
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,32,1536,0.011327999643981457
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,32,1024,0.011487999930977821
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,32,1024,0.009440000168979168
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,32,768,0.01033599954098463
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,32,768,0.010623999871313572
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,32,768,0.009216000325977802
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,32,512,0.00979200005531311
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,32,512,0.009631999768316746
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,32,512,0.008415999822318554
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,32,256,0.009151999838650227
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,32,256,0.009247999638319016
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,32,128,0.008832000195980072
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,32,256,0.00723200011998415
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,32,128,0.009056000038981438
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,32,64,0.012095999903976917
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,32,128,0.007007999811321497
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,32,2048,0.014399999752640724
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,32,64,0.012000000104308128
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,96,32,32,0.012480000033974648
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,32,64,0.006496000103652477
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,96,32,32,0.012191999703645706
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,96,32,32,0.006752000190317631
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,65536,12288,0.31308799982070923
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,65536,10240,0.27215999364852905
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,65536,12288,0.3713279962539673
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,65536,16384,0.43171200156211853
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,65536,10240,0.2845439910888672
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,65536,8192,0.21769599616527557
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,65536,12288,0.3089919984340668
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,65536,16384,0.41948801279067993
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,65536,8192,0.2317119985818863
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,65536,10240,0.2587839961051941
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,65536,7168,0.1908160001039505
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,65536,16384,0.3977920114994049
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,65536,6144,0.2033279985189438
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,65536,7168,0.18688000738620758
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,65536,6144,0.16601599752902985
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,65536,7168,0.22220799326896667
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,65536,8192,0.2085759937763214
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,65536,5120,0.1425279974937439
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,65536,5120,0.1653120070695877
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,65536,4096,0.11628799885511398
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,65536,3584,0.10089600086212158
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,65536,6144,0.1624000072479248
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,65536,4096,0.13753600418567657
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,65536,4096,0.11235199868679047
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,65536,3584,0.12249600142240524
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,65536,5120,0.13865600526332855
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,65536,3072,0.10486400127410889
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,65536,3072,0.08924800157546997
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,65536,2560,0.078015998005867
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,65536,3584,0.10236799716949463
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,65536,2560,0.0896959975361824
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,65536,2048,0.06540799885988235
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,65536,2048,0.07241600006818771
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,65536,2560,0.07894399762153625
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,65536,3072,0.09206400066614151
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,65536,1536,0.05951999872922897
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,65536,1536,0.05392000079154968
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,65536,1536,0.052928000688552856
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,65536,2048,0.06540799885988235
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,65536,1024,0.04095999896526337
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,65536,1024,0.04428799822926521
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,65536,768,0.03888000175356865
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,65536,768,0.03468799963593483
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,65536,1024,0.04137599840760231
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,65536,256,0.021663999184966087
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,65536,512,0.03254399821162224
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,65536,768,0.03411199897527695
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,65536,512,0.03046399913728237
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,65536,256,0.020800000056624413
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,65536,512,0.028031999245285988
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,65536,128,0.015263999812304974
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,65536,256,0.02179200015962124
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,65536,128,0.015231999568641186
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,65536,64,0.018079999834299088
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,65536,128,0.017696000635623932
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,65536,64,0.017632000148296356
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,65536,64,0.01568000018596649
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,65536,32,0.023135999217629433
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,65536,32,0.023455999791622162
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,65536,32,0.015168000012636185
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,16384,12288,0.10278400033712387
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,16384,16384,0.10851199924945831
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,16384,16384,0.12960000336170197
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,16384,16384,0.11363200098276138
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,16384,10240,0.0880960002541542
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,16384,12288,0.09440000355243683
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,16384,10240,0.07513599842786789
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,16384,12288,0.08566399663686752
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,16384,8192,0.0899519994854927
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,16384,8192,0.0695360004901886
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,16384,10240,0.08054400235414505
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,16384,65536,0.4268159866333008
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,16384,8192,0.06284800171852112
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,16384,7168,0.06492800265550613
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,16384,7168,0.055743999779224396
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,16384,6144,0.07065600156784058
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,16384,6144,0.05587200075387955
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,16384,7168,0.05750399827957153
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,16384,5120,0.04838399961590767
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,16384,5120,0.041439998894929886
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,16384,65536,0.49983999133110046
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,16384,6144,0.0533440001308918
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,16384,4096,0.04102399945259094
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,16384,5120,0.044576000422239304
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,16384,4096,0.05011200159788132
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,16384,3584,0.04854400083422661
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,16384,4096,0.03734400123357773
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,16384,3584,0.03641600161790848
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,16384,3584,0.032287999987602234
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,16384,3072,0.02864000014960766
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,16384,3072,0.03324799984693527
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,16384,2560,0.0261439997702837
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,16384,3072,0.030592000111937523
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,16384,2560,0.0289280004799366
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,16384,2560,0.02739199995994568
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,16384,2048,0.02473600022494793
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,16384,1536,0.02035200037062168
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,16384,2048,0.02252800017595291
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,16384,1536,0.021824000403285027
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,16384,1536,0.018688000738620758
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,16384,2048,0.022336000576615334
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,16384,1024,0.018303999677300453
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,16384,65536,0.3970879912376404
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,16384,1024,0.016704000532627106
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,16384,1024,0.01600000075995922
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,16384,768,0.015615999698638916
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,16384,768,0.016672000288963318
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,16384,768,0.015263999812304974
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,16384,512,0.014303999952971935
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,16384,512,0.013728000223636627
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,16384,512,0.012384000234305859
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,16384,256,0.011552000418305397
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,16384,256,0.011872000060975552
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,16384,128,0.010015999898314476
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,16384,256,0.010080000385642052
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,16384,128,0.01033599954098463
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,16384,128,0.009056000038981438
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,16384,64,0.01398400031030178
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,16384,64,0.013952000066637993
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,16384,32,0.015424000099301338
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,16384,64,0.008448000065982342
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,16384,32,0.01500799972563982
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,16384,32,0.008448000065982342
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,12288,12288,0.07913599908351898
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,12288,12288,0.08476799726486206
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,12288,16384,0.10400000214576721
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,12288,16384,0.12697599828243256
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,12288,10240,0.08422400057315826
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,12288,16384,0.09011200070381165
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,12288,10240,0.06896000355482101
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,12288,12288,0.07414399832487106
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,12288,10240,0.0650240033864975
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,12288,7168,0.0506879985332489
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,12288,8192,0.0504320003092289
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,12288,8192,0.0666240006685257
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,12288,8192,0.05651199817657471
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,12288,65536,0.3872640132904053
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,12288,7168,0.05071999877691269
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,12288,6144,0.061983998864889145
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,12288,6144,0.05152000114321709
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,12288,65536,0.4099520146846771
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,12288,5120,0.045632001012563705
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,12288,6144,0.041471999138593674
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,12288,7168,0.04467200115323067
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,12288,5120,0.03984000161290169
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,12288,4096,0.036031998693943024
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,12288,4096,0.035392001271247864
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,12288,3584,0.03110400028526783
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,12288,3584,0.031168000772595406
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,12288,5120,0.03497600182890892
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,12288,4096,0.02860799990594387
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,12288,3584,0.02796800062060356
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,12288,3072,0.03577600046992302
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,12288,3072,0.0289280004799366
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,12288,2560,0.03280000016093254
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,12288,2560,0.02707199938595295
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,12288,3072,0.02457600086927414
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,12288,2048,0.024447999894618988
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,12288,2560,0.02252800017595291
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,12288,2048,0.022975999861955643
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,12288,1536,0.020800000056624413
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,12288,65536,0.34086400270462036
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,12288,1536,0.019200000911951065
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,12288,2048,0.019967999309301376
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,12288,1536,0.01679999940097332
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,12288,1024,0.018112000077962875
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,12288,1024,0.016672000288963318
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,12288,1024,0.014336000196635723
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,12288,768,0.016287999227643013
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,12288,768,0.015584000386297703
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,12288,768,0.012864000163972378
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,12288,512,0.013504000380635262
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,12288,512,0.013919999822974205
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,12288,512,0.010879999957978725
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,12288,256,0.010975999757647514
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,12288,256,0.009375999681651592
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,12288,128,0.010463999584317207
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,12288,128,0.010528000071644783
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,12288,128,0.008224000222980976
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,12288,64,0.01398400031030178
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,12288,64,0.013663999736309052
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,12288,32,0.01484800036996603
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,12288,32,0.01500799972563982
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,12288,64,0.007872000336647034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,12288,32,0.00800000037997961
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,12288,256,0.011648000217974186
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,10240,12288,0.0926079973578453
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,10240,16384,0.10480000078678131
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,10240,16384,0.11840000003576279
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,10240,12288,0.08230400085449219
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,10240,16384,0.08377599716186523
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,10240,10240,0.07321599870920181
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,10240,12288,0.06966400146484375
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,10240,10240,0.06982400268316269
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,10240,8192,0.06275200098752975
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,10240,10240,0.06099199876189232
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,10240,65536,0.4061119854450226
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,10240,8192,0.05596800148487091
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,10240,7168,0.049855999648571014
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,10240,7168,0.051072001457214355
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,10240,8192,0.04879999905824661
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,10240,7168,0.04505600035190582
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,10240,65536,0.41491198539733887
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,10240,6144,0.04569600149989128
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,10240,6144,0.04623999819159508
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,10240,6144,0.039744000881910324
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,10240,5120,0.042527999728918076
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,10240,5120,0.052799999713897705
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,10240,5120,0.034432001411914825
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,10240,4096,0.034912001341581345
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,10240,4096,0.04342399910092354
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,10240,3584,0.030848000198602676
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,10240,4096,0.02953599952161312
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,10240,3584,0.04073600098490715
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,10240,3072,0.02844800055027008
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,10240,3072,0.03590400144457817
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,10240,3584,0.025087999179959297
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,10240,3072,0.023391999304294586
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,10240,2560,0.024960000067949295
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,10240,65536,0.31407999992370605
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,10240,2560,0.03129599988460541
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,10240,2560,0.022175999358296394
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,10240,2048,0.023840000852942467
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,10240,2048,0.025760000571608543
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,10240,2048,0.017920000478625298
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,10240,1536,0.018880000337958336
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,10240,1536,0.02038400061428547
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,10240,1536,0.016256000846624374
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,10240,1024,0.01756799966096878
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,10240,1024,0.01692800037562847
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,10240,1024,0.013472000136971474
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,10240,768,0.014816000126302242
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,10240,768,0.014720000326633453
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,10240,512,0.013632000423967838
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,10240,512,0.01283199992030859
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,10240,768,0.01244799979031086
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,10240,512,0.011327999643981457
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,10240,256,0.011359999887645245
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,10240,256,0.011071999557316303
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,10240,256,0.008352000266313553
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,10240,128,0.010143999941647053
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,10240,128,0.01104000024497509
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,10240,64,0.013504000380635262
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,10240,128,0.008671999908983707
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,10240,64,0.013535999692976475
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,10240,32,0.014592000283300877
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,10240,64,0.007424000184983015
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,10240,32,0.0144640002399683
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,10240,32,0.00774399982765317
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,8192,12288,0.051263999193906784
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,8192,12288,0.061503998935222626
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,8192,16384,0.06335999816656113
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,8192,16384,0.09772799909114838
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,8192,16384,0.060736000537872314
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,8192,10240,0.05366399884223938
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,8192,12288,0.050144001841545105
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,8192,10240,0.04886399954557419
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,8192,10240,0.04396799951791763
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,8192,8192,0.048287998884916306
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,8192,8192,0.04447999969124794
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,8192,7168,0.04982399940490723
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,8192,7168,0.03673600032925606
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,8192,8192,0.03494400158524513
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,8192,65536,0.24057599902153015
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,8192,7168,0.03167999908328056
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,8192,65536,0.2804799973964691
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,8192,6144,0.04230400174856186
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,8192,5120,0.03929600119590759
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,8192,6144,0.03868800029158592
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,8192,6144,0.029152000322937965
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,8192,4096,0.02828799933195114
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,8192,4096,0.029343999922275543
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,8192,5120,0.02505600079894066
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,8192,4096,0.02518399991095066
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,8192,3584,0.03232000023126602
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,8192,3584,0.03548799827694893
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,8192,3584,0.01961600035429001
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,8192,3072,0.029888000339269638
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,8192,65536,0.21660800278186798
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,8192,5120,0.03308799862861633
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,8192,3072,0.02412799932062626
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,8192,3072,0.017664000391960144
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,8192,2560,0.021727999672293663
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,8192,2560,0.021215999498963356
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,8192,2048,0.018624000251293182
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,8192,2560,0.015968000516295433
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,8192,1536,0.022175999358296394
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,8192,2048,0.024512000381946564
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,8192,1536,0.019519999623298645
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,8192,2048,0.014368000440299511
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,8192,1024,0.02127999998629093
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,8192,1536,0.013472000136971474
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,8192,1024,0.016063999384641647
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,8192,1024,0.012543999589979649
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,8192,768,0.013279999606311321
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,8192,768,0.012768000364303589
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,8192,512,0.011231999844312668
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,8192,768,0.013472000136971474
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,8192,512,0.010879999957978725
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,8192,512,0.010879999957978725
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,8192,256,0.01065600011497736
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,8192,256,0.010623999871313572
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,8192,128,0.009759999811649323
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,8192,128,0.009119999594986439
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,8192,256,0.00886400043964386
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,8192,128,0.008352000266313553
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,8192,64,0.013024000450968742
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,8192,64,0.012799999676644802
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,8192,32,0.013952000066637993
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,8192,64,0.007903999648988247
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,8192,32,0.014240000396966934
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,8192,32,0.007519999984651804
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,7168,12288,0.06835199892520905
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,7168,12288,0.07334399968385696
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,7168,16384,0.05878400057554245
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,7168,16384,0.06393600255250931
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,7168,12288,0.04873599857091904
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,7168,16384,0.05696000158786774
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,7168,10240,0.05817599967122078
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,7168,10240,0.06278400123119354
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,7168,10240,0.04102399945259094
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,7168,8192,0.05049600079655647
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,7168,8192,0.042080000042915344
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,7168,8192,0.03356799855828285
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,7168,7168,0.044895999133586884
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,7168,65536,0.2043199986219406
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,7168,7168,0.043807998299598694
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,7168,7168,0.030751999467611313
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,7168,6144,0.03779200091958046
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,7168,6144,0.04255999997258186
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,7168,5120,0.035360001027584076
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,7168,6144,0.027424000203609467
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,7168,65536,0.2192319929599762
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,7168,5120,0.02457600086927414
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,7168,5120,0.04179200157523155
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,7168,4096,0.030079999938607216
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,7168,4096,0.036768000572919846
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,7168,4096,0.020128000527620316
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,7168,3584,0.026464000344276428
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,7168,3584,0.03363199904561043
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,7168,65536,0.2123199999332428
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,7168,3072,0.0297279991209507
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,7168,3072,0.02473600022494793
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,7168,3584,0.018783999606966972
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,7168,3072,0.017311999574303627
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,7168,2560,0.02191999927163124
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,7168,2560,0.02112000063061714
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,7168,2560,0.01587199978530407
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,7168,2048,0.01961600035429001
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,7168,2048,0.01894400082528591
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,7168,1536,0.01865600049495697
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,7168,2048,0.016063999384641647
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,7168,1536,0.016287999227643013
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,7168,1536,0.012703999876976013
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,7168,1024,0.014240000396966934
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,7168,1024,0.01375999953597784
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,7168,1024,0.01235199999064207
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,7168,768,0.012799999676644802
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,7168,768,0.012191999703645706
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,7168,512,0.01065600011497736
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,7168,512,0.012128000147640705
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,7168,768,0.013376000337302685
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,7168,512,0.01104000024497509
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,7168,256,0.010143999941647053
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,7168,256,0.0098879998549819
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,7168,256,0.008448000065982342
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,7168,128,0.00940799992531538
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,7168,128,0.009344000369310379
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,7168,128,0.007872000336647034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,7168,64,0.013344000093638897
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,7168,64,0.013024000450968742
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,7168,32,0.013632000423967838
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,7168,32,0.014399999752640724
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,7168,64,0.007360000163316727
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,6144,12288,0.05100800096988678
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,6144,16384,0.056703999638557434
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,6144,16384,0.08940800279378891
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,7168,32,0.0074880002066493034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,6144,12288,0.04646399989724159
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,6144,16384,0.05676800012588501
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,6144,12288,0.04649600014090538
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,6144,10240,0.04531199857592583
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,6144,10240,0.03993599861860275
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,6144,10240,0.040832001715898514
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,6144,8192,0.05135999992489815
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,6144,7168,0.04572800174355507
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,6144,65536,0.17759999632835388
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,6144,8192,0.03340800106525421
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,6144,8192,0.03580800071358681
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,6144,65536,0.17948800325393677
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,6144,7168,0.036031998693943024
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,6144,6144,0.04297599941492081
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,6144,6144,0.03951999917626381
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,6144,5120,0.03513599932193756
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,6144,7168,0.030912000685930252
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,6144,6144,0.026847999542951584
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,6144,5120,0.03465599939227104
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,6144,4096,0.031328000128269196
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,6144,4096,0.029632000252604485
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,6144,3584,0.029152000322937965
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,6144,5120,0.02412799932062626
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,6144,3584,0.03244800120592117
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,6144,65536,0.20604799687862396
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,6144,3584,0.01897599920630455
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,6144,4096,0.02035200037062168
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,6144,3072,0.023615999147295952
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,6144,3072,0.027807999402284622
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,6144,3072,0.01724799908697605
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,6144,2560,0.022207999601960182
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,6144,2560,0.02179200015962124
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,6144,2048,0.022943999618291855
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,6144,2048,0.021663999184966087
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,6144,2560,0.01740800030529499
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,6144,1536,0.012703999876976013
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,6144,1536,0.015968000516295433
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,6144,1536,0.020767999812960625
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,6144,2048,0.015615999698638916
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,6144,1024,0.013919999822974205
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,6144,1024,0.012864000163972378
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,6144,768,0.012640000320971012
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,6144,1024,0.011839999817311764
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,6144,768,0.011935999616980553
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,6144,512,0.010688000358641148
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,6144,768,0.01321600005030632
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,6144,512,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,6144,512,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,6144,256,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,6144,256,0.009920000098645687
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,6144,256,0.00848000030964613
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,6144,128,0.01056000031530857
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,6144,128,0.009312000125646591
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,6144,128,0.007968000136315823
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,6144,64,0.012736000120639801
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,6144,64,0.013151999562978745
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,6144,64,0.007584000006318092
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,6144,32,0.013376000337302685
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,6144,32,0.013407999649643898
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,6144,32,0.0074880002066493034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,5120,12288,0.043487999588251114
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,5120,12288,0.04790399968624115
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,5120,16384,0.0891840010881424
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,5120,16384,0.05299200117588043
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,5120,16384,0.05596800148487091
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,5120,10240,0.04198399931192398
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,5120,12288,0.045823998749256134
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,5120,10240,0.0387520007789135
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,5120,10240,0.039264000952243805
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,5120,8192,0.039872001856565475
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,5120,8192,0.03392000123858452
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,5120,7168,0.033535998314619064
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,5120,8192,0.033055998384952545
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,5120,7168,0.03436800092458725
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,5120,65536,0.32767999172210693
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,5120,65536,0.1780800074338913
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,5120,6144,0.03286400064826012
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,5120,7168,0.030079999938607216
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,5120,6144,0.037856001406908035
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,5120,5120,0.032607998698949814
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,5120,5120,0.03215999901294708
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,5120,5120,0.022624000906944275
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,5120,6144,0.02659199945628643
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,5120,4096,0.03497600182890892
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,5120,4096,0.026464000344276428
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,5120,3584,0.026176000013947487
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,5120,4096,0.02022399939596653
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,5120,3584,0.032575998455286026
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,5120,65536,0.20336000621318817
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,5120,3584,0.01881599985063076
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,5120,3072,0.023584000766277313
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,5120,3072,0.028960000723600388
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,5120,2560,0.018719999119639397
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,5120,2560,0.015615999698638916
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,5120,3072,0.017216000705957413
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,5120,2048,0.01817600056529045
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,5120,2048,0.016543999314308167
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,5120,1536,0.02223999984562397
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,5120,2048,0.014655999839305878
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,5120,1536,0.015104000456631184
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,5120,1024,0.01945599913597107
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,5120,1536,0.012512000277638435
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,5120,1024,0.012256000190973282
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,5120,768,0.017216000705957413
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,5120,1024,0.014495999552309513
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,5120,768,0.011807999573647976
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,5120,512,0.013663999736309052
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,5120,768,0.012768000364303589
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,5120,512,0.01228800043463707
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,5120,256,0.010080000385642052
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,5120,2560,0.02502400055527687
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,5120,512,0.010304000228643417
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,5120,256,0.010048000141978264
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,5120,256,0.008128000423312187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,5120,128,0.009312000125646591
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,5120,128,0.009151999838650227
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,5120,64,0.014112000353634357
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,5120,128,0.007840000092983246
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,5120,32,0.013279999606311321
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,5120,64,0.013311999849975109
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,5120,64,0.00723200011998415
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,5120,32,0.013439999893307686
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,5120,32,0.007552000228315592
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,4096,12288,0.0435199998319149
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,4096,12288,0.04028800129890442
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,4096,16384,0.044895999133586884
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,4096,16384,0.04636799916625023
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,4096,16384,0.04022400081157684
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,4096,12288,0.035679999738931656
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,4096,10240,0.03836800158023834
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,4096,10240,0.035071998834609985
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,4096,8192,0.03625600039958954
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,4096,10240,0.030432000756263733
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,4096,8192,0.03404799848794937
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,4096,7168,0.0326399989426136
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,4096,7168,0.03171199932694435
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,4096,8192,0.025472000241279602
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,4096,65536,0.11462400108575821
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,4096,65536,0.14256000518798828
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,4096,6144,0.03046399913728237
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,4096,7168,0.02239999920129776
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,4096,6144,0.030559999868273735
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,4096,5120,0.029343999922275543
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,4096,5120,0.02579200081527233
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,4096,6144,0.021344000473618507
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,4096,4096,0.024671999737620354
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,4096,4096,0.03385600075125694
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,4096,5120,0.018624000251293182
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,4096,4096,0.015296000055968761
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,4096,3584,0.02195199951529503
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,4096,3584,0.02175999991595745
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,4096,65536,0.1340160071849823
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,4096,3072,0.02022399939596653
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,4096,3584,0.01414399966597557
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,4096,3072,0.019711999222636223
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,4096,3072,0.014879999682307243
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,4096,2560,0.017632000148296356
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,4096,2560,0.017184000462293625
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,4096,2560,0.013024000450968742
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,4096,2048,0.01603199914097786
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,4096,2048,0.020864000543951988
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,4096,1536,0.014751999638974667
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,4096,2048,0.012160000391304493
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,4096,1536,0.013728000223636627
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,4096,1024,0.012608000077307224
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,4096,1536,0.01196799986064434
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,4096,1024,0.012415999546647072
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,4096,768,0.01119999960064888
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,4096,1024,0.010432000271975994
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,4096,768,0.011168000288307667
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,4096,768,0.00940799992531538
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,4096,512,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,4096,512,0.010304000228643417
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,4096,512,0.010400000028312206
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,4096,256,0.009983999654650688
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,4096,256,0.009216000325977802
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,4096,256,0.008191999979317188
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,4096,128,0.009151999838650227
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,4096,128,0.009375999681651592
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,4096,128,0.0074880002066493034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,4096,64,0.013120000250637531
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,4096,64,0.012608000077307224
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,4096,64,0.007327999919652939
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,4096,32,0.013248000293970108
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,4096,32,0.013055999763309956
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,4096,32,0.007391999941319227
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,3584,12288,0.041760001331567764
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,3584,16384,0.04419200122356415
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,3584,16384,0.043296001851558685
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,3584,12288,0.03331200033426285
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,3584,16384,0.03888000175356865
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,3584,10240,0.03814399987459183
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,3584,10240,0.036448001861572266
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,3584,12288,0.038784001022577286
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,3584,8192,0.036448001861572266
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,3584,8192,0.03328000009059906
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,3584,10240,0.029311999678611755
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,3584,7168,0.0318400003015995
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,3584,65536,0.12992000579833984
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,3584,7168,0.031199999153614044
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,3584,8192,0.025696000084280968
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,3584,7168,0.021503999829292297
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,3584,6144,0.02956799976527691
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,3584,65536,0.11055999994277954
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,3584,6144,0.029055999591946602
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,3584,6144,0.02022399939596653
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,3584,5120,0.025151999667286873
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,3584,5120,0.02582399919629097
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,3584,65536,0.12848000228405
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,3584,5120,0.018112000077962875
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,3584,4096,0.023104000836610794
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,3584,4096,0.015231999568641186
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,3584,4096,0.02179200015962124
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,3584,3584,0.021023999899625778
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,3584,3584,0.019840000197291374
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,3584,3584,0.015615999698638916
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,3584,3072,0.01881599985063076
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,3584,3072,0.013376000337302685
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,3584,3072,0.018592000007629395
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,3584,2560,0.017311999574303627
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,3584,2560,0.016704000532627106
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,3584,2560,0.013055999763309956
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,3584,2048,0.015615999698638916
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,3584,2048,0.014816000126302242
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,3584,2048,0.012128000147640705
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,3584,1536,0.014047999866306782
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,3584,1536,0.01369599997997284
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,3584,1536,0.011615999974310398
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,3584,1024,0.012799999676644802
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,3584,1024,0.011935999616980553
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,3584,1024,0.010304000228643417
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,3584,768,0.011392000131309032
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,3584,768,0.011231999844312668
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,3584,768,0.009375999681651592
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,3584,512,0.009983999654650688
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,3584,512,0.010912000201642513
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,3584,256,0.009983999654650688
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,3584,256,0.009472000412642956
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,3584,512,0.009056000038981438
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,3584,256,0.008031999692320824
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,3584,128,0.009184000082314014
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,3584,64,0.013151999562978745
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,3584,128,0.009119999594986439
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,3584,64,0.01283199992030859
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,3584,128,0.007360000163316727
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,3584,64,0.007648000027984381
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,3584,32,0.012864000163972378
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,3584,32,0.013088000006973743
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,3584,32,0.007455999962985516
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,3072,12288,0.0382080003619194
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,3072,12288,0.03888000175356865
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,3072,16384,0.04403200000524521
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,3072,16384,0.04358400031924248
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,3072,16384,0.037087999284267426
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,3072,10240,0.036896001547575
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,3072,12288,0.03152000159025192
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,3072,10240,0.03356799855828285
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,3072,10240,0.029664000496268272
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,3072,8192,0.03356799855828285
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,3072,8192,0.03174399957060814
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,3072,8192,0.022207999601960182
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,3072,7168,0.03001599945127964
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,3072,7168,0.029311999678611755
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,3072,65536,0.10915199667215347
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,3072,65536,0.0989760011434555
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,3072,6144,0.02844800055027008
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,3072,7168,0.02160000056028366
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,3072,65536,0.12240000069141388
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,3072,6144,0.02768000029027462
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,3072,6144,0.02163200080394745
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,3072,5120,0.027615999802947044
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,3072,5120,0.024800000712275505
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,3072,4096,0.0226879995316267
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,3072,4096,0.02143999934196472
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,3072,4096,0.0163199994713068
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,3072,5120,0.017055999487638474
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,3072,3584,0.02070399932563305
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,3072,3584,0.019807999953627586
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,3072,3584,0.014688000082969666
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,3072,3072,0.019392000511288643
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,3072,3072,0.018464000895619392
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,3072,3072,0.014271999709308147
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,3072,2560,0.024383999407291412
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,3072,2560,0.017503999173641205
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,3072,2560,0.013439999893307686
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,3072,2048,0.016127999871969223
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,3072,1536,0.013887999579310417
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,3072,2048,0.020640000700950623
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,3072,2048,0.011264000087976456
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,3072,1536,0.018015999346971512
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,3072,1536,0.010400000028312206
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,3072,1024,0.01228800043463707
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,3072,1024,0.011711999773979187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,3072,768,0.014015999622642994
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,3072,768,0.011264000087976456
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,3072,1024,0.010015999898314476
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,3072,512,0.010528000071644783
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,3072,512,0.010528000071644783
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,3072,256,0.009440000168979168
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,3072,512,0.009216000325977802
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,3072,256,0.011071999557316303
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,3072,128,0.009184000082314014
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,3072,256,0.007840000092983246
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,3072,128,0.008895999751985073
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,3072,64,0.012736000120639801
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,3072,128,0.007584000006318092
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,3072,64,0.013344000093638897
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,3072,64,0.007519999984651804
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,3072,32,0.012959999963641167
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,3072,32,0.013344000093638897
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,3072,32,0.007455999962985516
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,3072,768,0.011711999773979187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,2560,12288,0.03977600112557411
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,2560,16384,0.04275200143456459
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,2560,16384,0.040991999208927155
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,2560,12288,0.03436800092458725
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,2560,16384,0.0360959991812706
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,2560,12288,0.03254399821162224
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,2560,10240,0.03606399893760681
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,2560,10240,0.03171199932694435
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,2560,8192,0.02985600009560585
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,2560,10240,0.026528000831604004
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,2560,8192,0.030719999223947525
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,2560,7168,0.03302399814128876
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,2560,8192,0.03324799984693527
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,2560,65536,0.12134400010108948
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,2560,7168,0.028063999488949776
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,2560,65536,0.12080000340938568
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,2560,65536,0.08383999764919281
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,2560,6144,0.02703999914228916
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,2560,6144,0.0261439997702837
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,2560,7168,0.0208320003002882
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,2560,6144,0.02643200010061264
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,2560,5120,0.024288000538945198
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,2560,5120,0.03136000037193298
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,2560,4096,0.023360000923275948
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,2560,5120,0.016767999157309532
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,2560,4096,0.021056000143289566
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,2560,3584,0.020800000056624413
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,2560,4096,0.014592000283300877
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,2560,3584,0.019967999309301376
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,2560,3072,0.018719999119639397
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,2560,3584,0.013567999936640263
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,2560,3072,0.0180479995906353
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,2560,3072,0.014368000440299511
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,2560,2560,0.016896000131964684
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,2560,2560,0.012512000277638435
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,2560,2560,0.016704000532627106
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,2560,2048,0.01568000018596649
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,2560,2048,0.014751999638974667
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,2560,1536,0.013504000380635262
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,2560,2048,0.01119999960064888
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,2560,1536,0.013567999936640263
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,2560,1024,0.012512000277638435
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,2560,1536,0.01104000024497509
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,2560,1024,0.012000000104308128
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,2560,1024,0.010367999784648418
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,2560,768,0.011071999557316303
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,2560,768,0.01104000024497509
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,2560,512,0.010111999697983265
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,2560,768,0.010239999741315842
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,2560,512,0.010495999827980995
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,2560,256,0.009664000011980534
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,2560,512,0.009824000298976898
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,2560,256,0.009696000255644321
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,2560,128,0.008832000195980072
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,2560,256,0.008991999551653862
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,2560,128,0.009184000082314014
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,2560,128,0.0071680000983178616
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,2560,64,0.012959999963641167
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,2560,64,0.013151999562978745
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,2560,64,0.00723200011998415
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,2560,32,0.012864000163972378
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,2560,32,0.012864000163972378
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,2560,32,0.007199999876320362
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,2048,12288,0.03884800150990486
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,2048,12288,0.031199999153614044
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,2048,16384,0.037087999284267426
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,2048,16384,0.03657599911093712
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,2048,16384,0.038176000118255615
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,2048,12288,0.029343999922275543
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,2048,10240,0.03651199862360954
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,2048,10240,0.02908799983561039
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,2048,8192,0.03516799956560135
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,2048,10240,0.04025600105524063
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,2048,8192,0.026496000587940216
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,2048,65536,0.07865600287914276
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,2048,7168,0.031168000772595406
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,2048,65536,0.09040000289678574
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,2048,8192,0.025760000571608543
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,2048,7168,0.026208000257611275
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,2048,7168,0.020447999238967896
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,2048,6144,0.0244159996509552
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,2048,6144,0.017632000148296356
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,2048,5120,0.024671999737620354
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,2048,5120,0.022272000089287758
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,2048,5120,0.016416000202298164
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,2048,4096,0.02067199908196926
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,2048,65536,0.11903999745845795
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,2048,4096,0.02070399932563305
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,2048,3584,0.020416000857949257
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,2048,4096,0.014879999682307243
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,2048,3584,0.019840000197291374
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,2048,3584,0.013439999893307686
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,2048,3072,0.018112000077962875
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,2048,3072,0.018848000094294548
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,2048,3072,0.01616000011563301
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,2048,2560,0.016831999644637108
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,2048,2560,0.016448000445961952
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,2048,2560,0.011744000017642975
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,2048,2048,0.01500799972563982
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,2048,6144,0.026367999613285065
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,2048,2048,0.014976000413298607
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,2048,2048,0.011008000001311302
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,2048,1536,0.013279999606311321
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,2048,1536,0.013407999649643898
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,2048,1536,0.01033599954098463
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,2048,1024,0.011615999974310398
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,2048,1024,0.011648000217974186
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,2048,768,0.011071999557316303
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,2048,1024,0.009503999724984169
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,2048,768,0.010912000201642513
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,2048,512,0.00979200005531311
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,2048,768,0.009119999594986439
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,2048,512,0.010432000271975994
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,2048,512,0.009535999968647957
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,2048,256,0.009535999968647957
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,2048,256,0.009312000125646591
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,2048,256,0.00800000037997961
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,2048,128,0.008927999995648861
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,2048,128,0.008895999751985073
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,2048,128,0.007455999962985516
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,2048,64,0.012736000120639801
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,2048,64,0.012959999963641167
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,2048,64,0.007296000141650438
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,2048,32,0.012384000234305859
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,2048,32,0.012608000077307224
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,2048,32,0.006943999789655209
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,1536,12288,0.03286400064826012
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,1536,12288,0.029759999364614487
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,1536,16384,0.044704001396894455
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,1536,16384,0.035711999982595444
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,1536,16384,0.055296000093221664
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,1536,12288,0.045471999794244766
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,1536,10240,0.030432000756263733
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,1536,10240,0.02643200010061264
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,1536,10240,0.039135999977588654
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,1536,8192,0.02393599972128868
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,1536,8192,0.023615999147295952
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,1536,8192,0.022112000733613968
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,1536,7168,0.02412799932062626
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,1536,65536,0.08028800040483475
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,1536,65536,0.07804799824953079
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,1536,7168,0.023296000435948372
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,1536,6144,0.025248000398278236
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,1536,7168,0.02022399939596653
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,1536,6144,0.02239999920129776
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,1536,5120,0.02300800010561943
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,1536,5120,0.022016000002622604
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,1536,6144,0.017343999817967415
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,1536,5120,0.0226879995316267
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,1536,4096,0.02179200015962124
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,1536,4096,0.021088000386953354
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,1536,3584,0.019807999953627586
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,1536,3584,0.019807999953627586
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,1536,65536,0.11744000017642975
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,1536,4096,0.015200000256299973
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,1536,3072,0.018880000337958336
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,1536,3584,0.017311999574303627
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,1536,3072,0.01756799966096878
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,1536,2560,0.01692800037562847
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,1536,3072,0.012671999633312225
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,1536,2560,0.01600000075995922
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,1536,2048,0.014592000283300877
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,1536,2048,0.01500799972563982
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,1536,2560,0.012128000147640705
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,1536,2048,0.011552000418305397
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,1536,1536,0.012736000120639801
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,1536,1536,0.013535999692976475
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,1536,1024,0.011776000261306763
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,1536,1536,0.01065600011497736
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,1536,1024,0.011807999573647976
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,1536,1024,0.010304000228643417
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,1536,768,0.010912000201642513
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,1536,768,0.010912000201642513
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,1536,768,0.008895999751985073
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,1536,512,0.009824000298976898
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,1536,512,0.010239999741315842
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,1536,512,0.009440000168979168
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,1536,256,0.009535999968647957
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,1536,256,0.009600000455975533
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,1536,256,0.00774399982765317
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,1536,128,0.008960000239312649
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,1536,128,0.009056000038981438
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,1536,128,0.007135999854654074
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,1536,64,0.006976000033318996
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,1536,64,0.012575999833643436
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,1536,32,0.012959999963641167
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,1536,32,0.012959999963641167
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,1536,32,0.007135999854654074
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,1024,12288,0.029759999364614487
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,1024,16384,0.03283200040459633
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,1024,16384,0.0342399999499321
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,1536,64,0.01244799979031086
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,1024,16384,0.030527999624609947
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,1024,12288,0.02489599958062172
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,1024,12288,0.04416000097990036
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,1024,10240,0.025887999683618546
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,1024,10240,0.02470399998128414
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,1024,8192,0.027456000447273254
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,1024,10240,0.039712000638246536
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,1024,65536,0.06233600154519081
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,1024,8192,0.022911999374628067
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,1024,65536,0.0788159966468811
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,1024,7168,0.022816000506281853
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,1024,7168,0.022207999601960182
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,1024,8192,0.03142400085926056
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,1024,65536,0.11590400338172913
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,1024,7168,0.02035200037062168
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,1024,6144,0.027327999472618103
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,1024,6144,0.021983999758958817
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,1024,5120,0.02035200037062168
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,1024,5120,0.02175999991595745
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,1024,4096,0.022175999358296394
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,1024,6144,0.025119999423623085
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,1024,5120,0.02115200087428093
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,1024,4096,0.019551999866962433
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,1024,4096,0.01398400031030178
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,1024,3584,0.020128000527620316
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,1024,3072,0.01775999926030636
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,1024,3072,0.018719999119639397
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,1024,3584,0.019039999693632126
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,1024,3584,0.013088000006973743
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,1024,2560,0.017343999817967415
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,1024,3072,0.016095999628305435
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,1024,2560,0.011744000017642975
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,1024,2560,0.016383999958634377
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,1024,2048,0.014368000440299511
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,1024,2048,0.010847999714314938
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,1024,2048,0.014879999682307243
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,1024,1536,0.012543999589979649
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,1024,1536,0.013439999893307686
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,1024,1536,0.01065600011497736
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,1024,1024,0.01158399973064661
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,1024,1024,0.012256000190973282
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,1024,768,0.010847999714314938
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,1024,768,0.010975999757647514
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,1024,1024,0.009535999968647957
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,1024,512,0.010239999741315842
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,1024,768,0.010688000358641148
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,1024,512,0.010080000385642052
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,1024,256,0.009279999881982803
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,1024,256,0.009247999638319016
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,1024,512,0.009184000082314014
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,1024,128,0.009216000325977802
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,1024,128,0.008991999551653862
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,1024,256,0.007872000336647034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,1024,64,0.012799999676644802
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,1024,128,0.007199999876320362
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,1024,64,0.012703999876976013
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,1024,32,0.013055999763309956
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,1024,64,0.007296000141650438
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,1024,32,0.012608000077307224
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,1024,32,0.006976000033318996
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,768,12288,0.0318400003015995
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,768,12288,0.025087999179959297
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,768,16384,0.03392000123858452
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,768,16384,0.02908799983561039
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,768,16384,0.039744000881910324
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,768,12288,0.02848000079393387
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,768,10240,0.024351999163627625
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,768,10240,0.024320000782608986
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,768,10240,0.025280000641942024
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,768,8192,0.023871999233961105
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,768,8192,0.022304000332951546
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,768,65536,0.05369599908590317
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,768,65536,0.056543998420238495
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,768,8192,0.03219199925661087
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,768,7168,0.022752000018954277
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,768,7168,0.023871999233961105
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,768,7168,0.019936000928282738
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,768,6144,0.02304000034928322
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,768,6144,0.023679999634623528
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,768,6144,0.017472000792622566
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,768,5120,0.02038400061428547
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,768,5120,0.023584000766277313
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,768,5120,0.015807999297976494
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,768,65536,0.11654400080442429
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,768,4096,0.01974399946630001
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,768,4096,0.020479999482631683
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,768,4096,0.01833599992096424
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,768,3584,0.019360000267624855
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,768,3584,0.019487999379634857
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,768,3584,0.013120000250637531
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,768,3072,0.018112000077962875
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,768,3072,0.01235199999064207
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,768,2560,0.016736000776290894
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,768,2048,0.01532800029963255
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,768,2560,0.016416000202298164
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,768,2560,0.011615999974310398
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,768,2048,0.014655999839305878
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,768,1536,0.013024000450968742
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,768,2048,0.010688000358641148
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,768,1536,0.013183999806642532
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,768,1024,0.011615999974310398
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,768,1024,0.01152000017464161
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,768,1536,0.01027199998497963
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,768,768,0.010623999871313572
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,768,768,0.010304000228643417
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,768,1024,0.009440000168979168
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,768,768,0.011264000087976456
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,768,512,0.010111999697983265
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,768,512,0.0098879998549819
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,768,512,0.00940799992531538
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,768,256,0.00886400043964386
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,768,256,0.00902399979531765
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,768,3072,0.018144000321626663
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,768,128,0.008927999995648861
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,768,256,0.007840000092983246
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,768,128,0.009344000369310379
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,768,64,0.012896000407636166
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,768,128,0.00723200011998415
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,768,64,0.012799999676644802
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,768,32,0.012575999833643436
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,768,64,0.007199999876320362
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,768,32,0.012992000207304955
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,768,32,0.006912000011652708
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,512,12288,0.027135999873280525
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,512,12288,0.02643200010061264
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,512,16384,0.02864000014960766
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,512,16384,0.029055999591946602
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,512,16384,0.03411199897527695
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,512,12288,0.04390399903059006
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,512,10240,0.02319999970495701
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,512,8192,0.021888000890612602
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,512,10240,0.022592000663280487
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,512,10240,0.038047999143600464
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,512,8192,0.021663999184966087
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,512,65536,0.05507199838757515
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,512,65536,0.04668800160288811
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,512,7168,0.025087999179959297
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,512,8192,0.021727999672293663
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,512,7168,0.020800000056624413
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,512,6144,0.02579200081527233
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,512,7168,0.02006400004029274
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,512,5120,0.02067199908196926
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,512,6144,0.020576000213623047
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,512,6144,0.025248000398278236
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,512,5120,0.02051199972629547
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,512,5120,0.01600000075995922
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,512,4096,0.020031999796628952
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,512,4096,0.02195199951529503
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,512,65536,0.1167680025100708
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,512,3584,0.02054399996995926
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,512,4096,0.01360000018030405
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,512,3584,0.019680000841617584
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,512,3072,0.017376000061631203
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,512,3584,0.012959999963641167
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,512,3072,0.01759999990463257
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,512,3072,0.012128000147640705
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,512,2560,0.016863999888300896
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,512,2560,0.016095999628305435
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,512,2048,0.014527999795973301
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,512,2048,0.014560000039637089
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,512,2560,0.011392000131309032
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,512,1536,0.013407999649643898
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,512,2048,0.010495999827980995
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,512,1536,0.013055999763309956
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,512,1024,0.011935999616980553
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,512,1536,0.011744000017642975
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,512,1024,0.011872000060975552
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,512,1024,0.009759999811649323
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,512,768,0.010432000271975994
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,512,768,0.011008000001311302
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,512,512,0.009855999611318111
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,512,768,0.011071999557316303
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,512,256,0.009312000125646591
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,512,512,0.010015999898314476
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,512,512,0.009279999881982803
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,512,256,0.009824000298976898
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,512,128,0.009119999594986439
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,512,128,0.008991999551653862
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,512,256,0.007712000049650669
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,512,128,0.0077760000713169575
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,512,64,0.01228800043463707
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,512,64,0.012543999589979649
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,512,64,0.00723200011998415
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,512,32,0.012415999546647072
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,512,32,0.01283199992030859
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,512,32,0.00687999976798892
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,256,12288,0.03619199991226196
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,256,12288,0.02687999978661537
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,256,16384,0.03683200106024742
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,256,16384,0.02800000086426735
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,256,12288,0.044704001396894455
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,256,10240,0.02598400041460991
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,256,10240,0.025855999439954758
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,256,10240,0.024607999250292778
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,256,8192,0.025472000241279602
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,256,8192,0.026016000658273697
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,256,16384,0.03984000161290169
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,256,65536,0.11561600118875504
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,256,7168,0.02470399998128414
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,256,8192,0.03161599859595299
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,256,65536,0.04620800167322159
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,256,65536,0.04707200080156326
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,256,7168,0.025151999667286873
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,256,7168,0.028831999748945236
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,256,6144,0.024224000051617622
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,256,6144,0.024032000452280045
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,256,5120,0.022336000576615334
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,256,5120,0.022272000089287758
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,256,4096,0.021215999498963356
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,256,5120,0.022304000332951546
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,256,6144,0.016831999644637108
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,256,4096,0.020479999482631683
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,256,3584,0.018751999363303185
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,256,4096,0.015104000456631184
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,256,3584,0.019231999292969704
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,256,3072,0.01881599985063076
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,256,3584,0.0180479995906353
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,256,3072,0.017632000148296356
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,256,2560,0.016448000445961952
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,256,3072,0.01190400030463934
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,256,2560,0.01648000068962574
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,256,2560,0.011359999887645245
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,256,2048,0.015072000212967396
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,256,2048,0.01484800036996603
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,256,1536,0.012992000207304955
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,256,2048,0.010623999871313572
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,256,1536,0.012640000320971012
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,256,1024,0.011455999687314034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,256,1024,0.011711999773979187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,256,1536,0.011552000418305397
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,256,1024,0.009600000455975533
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,256,768,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,256,768,0.01056000031530857
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,256,768,0.011008000001311302
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,256,512,0.010015999898314476
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,256,512,0.009727999567985535
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,256,512,0.009216000325977802
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,256,256,0.008991999551653862
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,256,256,0.009279999881982803
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,256,128,0.00863999966531992
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,256,256,0.007935999892652035
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,256,128,0.009440000168979168
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,256,64,0.012223999947309494
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,256,128,0.007199999876320362
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,256,64,0.012671999633312225
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,256,32,0.012768000364303589
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,256,64,0.006943999789655209
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,256,32,0.012575999833643436
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,256,32,0.007071999832987785
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,128,12288,0.034143999218940735
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,128,16384,0.038975998759269714
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,128,12288,0.02643200010061264
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,128,16384,0.02864000014960766
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,128,16384,0.0342399999499321
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,128,10240,0.028672000393271446
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,128,12288,0.04412800073623657
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,128,10240,0.025567999109625816
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,128,10240,0.03836800158023834
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,128,8192,0.02598400041460991
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,128,8192,0.025536000728607178
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,128,7168,0.022943999618291855
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,128,65536,0.06473600119352341
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,128,8192,0.021407999098300934
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,128,65536,0.04320000112056732
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,128,7168,0.025439999997615814
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,128,6144,0.024288000538945198
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,128,7168,0.019551999866962433
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,128,6144,0.024768000468611717
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,128,5120,0.02147199958562851
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,128,5120,0.021568000316619873
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,128,6144,0.017311999574303627
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,128,5120,0.01532800029963255
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,128,4096,0.022175999358296394
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,128,65536,0.11670400202274323
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,128,4096,0.01913600042462349
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,128,3584,0.018688000738620758
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,128,4096,0.013632000423967838
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,128,3584,0.01913600042462349
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,128,3072,0.018559999763965607
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,128,3584,0.013120000250637531
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,128,3072,0.017823999747633934
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,128,3072,0.012512000277638435
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,128,2560,0.015424000099301338
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,128,2560,0.015263999812304974
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,128,2048,0.015104000456631184
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,128,2560,0.011296000331640244
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,128,2048,0.014368000440299511
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,128,2048,0.012959999963641167
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,128,1536,0.012768000364303589
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,128,1536,0.01360000018030405
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,128,1536,0.011231999844312668
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,128,1024,0.011744000017642975
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,128,1024,0.009344000369310379
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,128,768,0.010463999584317207
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,128,768,0.010847999714314938
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,128,512,0.009952000342309475
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,128,768,0.01065600011497736
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,128,512,0.010463999584317207
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,128,512,0.009088000282645226
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,128,256,0.009184000082314014
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,128,256,0.009151999838650227
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,128,256,0.007552000228315592
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,128,128,0.008960000239312649
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,128,128,0.00848000030964613
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,128,128,0.00723200011998415
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,128,64,0.012512000277638435
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,128,64,0.012608000077307224
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,128,64,0.007391999941319227
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,128,32,0.012575999833643436
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,128,32,0.012640000320971012
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,128,32,0.006816000211983919
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,128,1024,0.01119999960064888
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,64,12288,0.02672000043094158
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,64,12288,0.027295999228954315
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,64,16384,0.02876799926161766
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,64,16384,0.029279999434947968
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,64,16384,0.03843199834227562
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,64,12288,0.027807999402284622
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,64,10240,0.02598400041460991
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,64,10240,0.02582399919629097
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,64,10240,0.038176000118255615
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,64,8192,0.025728000327944756
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,64,8192,0.02505600079894066
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,64,7168,0.025280000641942024
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,64,65536,0.044064000248909
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,64,65536,0.04358400031924248
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,64,8192,0.03203200176358223
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,64,7168,0.02489599958062172
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,64,7168,0.02022399939596653
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,64,6144,0.02457600086927414
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,64,6144,0.024512000381946564
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,64,5120,0.021023999899625778
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,64,6144,0.01696000061929226
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,64,5120,0.021727999672293663
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,64,5120,0.015104000456631184
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,64,65536,0.11580800265073776
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,64,4096,0.0191040001809597
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,64,4096,0.020320000126957893
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,64,3584,0.018719999119639397
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,64,3584,0.018559999763965607
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,64,4096,0.017855999991297722
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,64,3584,0.017632000148296356
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,64,3072,0.017023999243974686
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,64,3072,0.017343999817967415
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,64,2560,0.01571200042963028
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,64,3072,0.011776000261306763
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,64,2560,0.01587199978530407
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,64,2560,0.011136000044643879
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,64,2048,0.014015999622642994
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,64,1536,0.013344000093638897
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,64,2048,0.014527999795973301
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,64,1536,0.01283199992030859
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,64,1536,0.009503999724984169
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,64,2048,0.01065600011497736
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,64,1024,0.010944000445306301
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,64,1024,0.01152000017464161
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,64,1024,0.008704000152647495
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,64,768,0.011103999800980091
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,64,768,0.010688000358641148
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,64,512,0.009600000455975533
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,64,512,0.010048000141978264
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,64,768,0.00886400043964386
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,64,512,0.008224000222980976
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,64,256,0.009088000282645226
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,64,256,0.009119999594986439
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,64,128,0.00848000030964613
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,64,256,0.007872000336647034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,64,64,0.012415999546647072
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,64,128,0.007615999784320593
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,64,128,0.007040000054985285
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,64,64,0.01206399966031313
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,64,64,0.006719999946653843
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,64,32,0.012160000391304493
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,64,32,0.01235199999064207
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,64,32,0.006943999789655209
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,32,12288,0.02751999907195568
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,32,12288,0.027103999629616737
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,32,16384,0.028543999418616295
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,32,16384,0.02921600081026554
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,32,12288,0.042399998754262924
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,32,16384,0.05164799839258194
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,32,10240,0.026240000501275063
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,32,10240,0.026208000257611275
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,32,8192,0.02630399912595749
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,32,65536,0.04390399903059006
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,32,8192,0.02502400055527687
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,32,7168,0.025248000398278236
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,32,8192,0.030047999694943428
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,32,65536,0.04412800073623657
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,32,7168,0.025151999667286873
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,32,6144,0.0244159996509552
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,32,5120,0.021247999742627144
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,32,6144,0.024288000538945198
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,32,7168,0.02783999964594841
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,32,6144,0.02271999977529049
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,32,65536,0.19142399728298187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,32,5120,0.02175999991595745
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,32,10240,0.03376000002026558
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,32,4096,0.01942400075495243
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,32,4096,0.019967999309301376
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,32,3584,0.020416000857949257
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,32,5120,0.02115200087428093
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,32,4096,0.018559999763965607
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,32,3584,0.019360000267624855
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,32,3584,0.01692800037562847
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,32,3072,0.016607999801635742
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,32,2560,0.016063999384641647
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,32,3072,0.017823999747633934
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,32,3072,0.015104000456631184
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,32,2560,0.013887999579310417
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,32,2560,0.015968000516295433
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,32,2048,0.01408000010997057
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,32,2048,0.014527999795973301
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,32,1536,0.013248000293970108
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,32,2048,0.012319999746978283
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,32,1536,0.012992000207304955
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,32,1536,0.010847999714314938
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,32,1024,0.011136000044643879
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,32,1024,0.01119999960064888
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,32,1024,0.011296000331640244
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,32,768,0.01027199998497963
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,32,768,0.010847999714314938
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,32,768,0.00854399986565113
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,32,512,0.009727999567985535
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,32,512,0.009952000342309475
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,32,256,0.00902399979531765
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,32,512,0.008448000065982342
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,32,256,0.009056000038981438
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,32,256,0.0080960001796484
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,32,128,0.008895999751985073
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,32,128,0.008224000222980976
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,32,128,0.007135999854654074
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,32,64,0.012512000277638435
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,32,64,0.01244799979031086
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,32,64,0.006752000190317631
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,80,32,32,0.012191999703645706
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,80,32,32,0.012512000277638435
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,80,32,32,0.006912000011652708
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,65536,12288,0.24403199553489685
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,65536,16384,0.3189760148525238
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,65536,12288,0.30428799986839294
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,65536,10240,0.20611199736595154
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,65536,10240,0.26233598589897156
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,65536,16384,0.39929598569869995
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,65536,12288,0.2858240008354187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,65536,8192,0.16636799275875092
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,65536,8192,0.21254399418830872
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,65536,7168,0.14617599546909332
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,65536,16384,0.37676799297332764
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,65536,10240,0.23824000358581543
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,65536,7168,0.1844480037689209
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,65536,8192,0.19644799828529358
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,65536,6144,0.16019199788570404
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,65536,6144,0.12982399761676788
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,65536,5120,0.10892800241708755
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,65536,7168,0.17321600019931793
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,65536,5120,0.13728000223636627
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,65536,4096,0.08959999680519104
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,65536,4096,0.11001600325107574
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,65536,6144,0.15091200172901154
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,65536,3584,0.10041599720716476
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,65536,3584,0.07968000322580338
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,65536,5120,0.12902399897575378
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,65536,3072,0.07027199864387512
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,65536,4096,0.10576000064611435
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,65536,3072,0.08444800227880478
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,65536,3584,0.0963520035147667
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,65536,2560,0.060864001512527466
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,65536,3072,0.08416000008583069
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,65536,2560,0.07280000299215317
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,65536,2048,0.05164799839258194
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,65536,2048,0.060896001756191254
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,65536,2560,0.07283200323581696
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,65536,1536,0.042399998754262924
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,65536,1536,0.04831999912858009
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,65536,2048,0.05990400165319443
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,65536,1024,0.03580800071358681
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,65536,1024,0.03215999901294708
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,65536,1536,0.04864000156521797
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,65536,768,0.02985600009560585
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,65536,768,0.027327999472618103
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,65536,1024,0.03763199970126152
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,65536,512,0.02393599972128868
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,65536,512,0.022272000089287758
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,65536,768,0.03177599981427193
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,65536,256,0.01651199907064438
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,65536,512,0.02364799939095974
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,65536,256,0.01587199978530407
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,65536,128,0.01244799979031086
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,65536,256,0.01756799966096878
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,65536,128,0.012608000077307224
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,65536,128,0.01414399966597557
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,65536,64,0.01571200042963028
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,65536,32,0.019007999449968338
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,65536,32,0.018719999119639397
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,65536,64,0.012575999833643436
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,65536,32,0.012736000120639801
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,16384,12288,0.08470399677753448
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,65536,64,0.015424000099301338
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,16384,16384,0.11046399921178818
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,16384,16384,0.10867200046777725
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,16384,12288,0.08483199775218964
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,16384,16384,0.09993600100278854
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,16384,10240,0.0719359964132309
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,16384,12288,0.07718399912118912
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,16384,10240,0.07295999675989151
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,16384,8192,0.05990400165319443
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,16384,10240,0.06646399945020676
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,16384,8192,0.05984000116586685
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,16384,65536,0.32681599259376526
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,16384,7168,0.05206400156021118
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,16384,8192,0.055743999779224396
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,16384,7168,0.05500800162553787
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,16384,6144,0.0459199994802475
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,16384,6144,0.04960000142455101
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,16384,7168,0.051711998879909515
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,16384,65536,0.39849600195884705
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,16384,5120,0.03372799977660179
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,16384,5120,0.042208001017570496
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,16384,6144,0.04614400118589401
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,16384,4096,0.03363199904561043
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,16384,4096,0.03580800071358681
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,16384,5120,0.04022400081157684
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,16384,3584,0.02659199945628643
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,16384,3584,0.032287999987602234
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,16384,4096,0.033215999603271484
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,16384,3072,0.029055999591946602
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,16384,3584,0.030527999624609947
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,16384,3072,0.024191999807953835
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,16384,3072,0.027648000046610832
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,16384,2560,0.026688000187277794
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,16384,2560,0.023615999147295952
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,16384,2048,0.022752000018954277
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,16384,2048,0.02054399996995926
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,16384,1536,0.01881599985063076
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,16384,2048,0.021247999742627144
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,16384,2560,0.02425600029528141
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,16384,65536,0.3741759955883026
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,16384,1536,0.01696000061929226
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,16384,1024,0.015359999611973763
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,16384,1024,0.014655999839305878
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,16384,1536,0.01772800087928772
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,16384,768,0.014047999866306782
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,16384,768,0.01283199992030859
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,16384,1024,0.015072000212967396
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,16384,512,0.012927999719977379
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,16384,768,0.01369599997997284
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,16384,512,0.01158399973064661
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,16384,256,0.009727999567985535
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,16384,256,0.009279999881982803
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,16384,512,0.012095999903976917
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,16384,128,0.008832000195980072
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,16384,256,0.010304000228643417
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,16384,128,0.009983999654650688
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,16384,128,0.009056000038981438
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,16384,64,0.012736000120639801
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,16384,64,0.012640000320971012
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,16384,32,0.013183999806642532
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,16384,64,0.008608000352978706
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,16384,32,0.012959999963641167
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,16384,32,0.008832000195980072
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,12288,12288,0.06886400282382965
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,12288,12288,0.06684800237417221
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,12288,16384,0.08975999802350998
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,12288,16384,0.08441600203514099
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,12288,16384,0.0790719985961914
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,12288,10240,0.056992001831531525
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,12288,10240,0.05910399928689003
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,12288,12288,0.061535999178886414
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,12288,8192,0.0504320003092289
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,12288,8192,0.04944000020623207
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,12288,10240,0.05270399898290634
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,12288,8192,0.043935999274253845
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,12288,7168,0.044096000492572784
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,12288,65536,0.3322240114212036
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,12288,7168,0.04428799822926521
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,12288,6144,0.03903999924659729
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,12288,7168,0.04294399917125702
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,12288,6144,0.03846399858593941
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,12288,5120,0.033824000507593155
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,12288,6144,0.037248000502586365
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,12288,5120,0.03625600039958954
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,12288,4096,0.028896000236272812
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,12288,4096,0.028896000236272812
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,12288,5120,0.032896000891923904
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,12288,3584,0.02828799933195114
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,12288,4096,0.027648000046610832
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,12288,3584,0.025280000641942024
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,12288,65536,0.31004801392555237
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,12288,65536,0.2858240008354187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,12288,3072,0.02489599958062172
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,12288,3072,0.023711999878287315
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,12288,3072,0.02287999913096428
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,12288,2560,0.02147199958562851
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,12288,2560,0.023840000852942467
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,12288,2048,0.020128000527620316
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,12288,2560,0.022304000332951546
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,12288,2048,0.01852799952030182
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,12288,1536,0.016095999628305435
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,12288,1536,0.017152000218629837
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,12288,3584,0.02598400041460991
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,12288,2048,0.020160000771284103
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,12288,1024,0.014879999682307243
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,12288,1024,0.014303999952971935
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,12288,1536,0.016736000776290894
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,12288,1024,0.013344000093638897
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,12288,768,0.012319999746978283
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,12288,768,0.012384000234305859
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,12288,512,0.011392000131309032
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,12288,512,0.01142400037497282
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,12288,512,0.012384000234305859
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,12288,256,0.009568000212311745
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,12288,768,0.012191999703645706
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,12288,256,0.009824000298976898
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,12288,128,0.008767999708652496
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,12288,256,0.009727999567985535
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,12288,128,0.008832000195980072
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,12288,64,0.012384000234305859
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,12288,64,0.012256000190973282
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,12288,64,0.00854399986565113
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,12288,128,0.009472000412642956
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,12288,32,0.012703999876976013
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,12288,32,0.012864000163972378
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,12288,32,0.008960000239312649
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,10240,12288,0.06595200300216675
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,10240,12288,0.0692799985408783
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,10240,16384,0.09014400094747543
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,10240,16384,0.08371199667453766
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,10240,16384,0.0687360018491745
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,10240,10240,0.05926400050520897
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,10240,10240,0.05619199946522713
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,10240,12288,0.05392000079154968
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,10240,10240,0.04742399975657463
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,10240,8192,0.046560000628232956
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,10240,8192,0.05023999884724617
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,10240,7168,0.04150399938225746
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,10240,7168,0.04355200007557869
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,10240,8192,0.039712000638246536
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,10240,65536,0.3171840012073517
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,10240,7168,0.03734400123357773
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,10240,6144,0.03891199827194214
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,10240,6144,0.037728000432252884
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,10240,5120,0.0331839993596077
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,10240,5120,0.034015998244285583
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,10240,6144,0.03436800092458725
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,10240,4096,0.027807999402284622
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,10240,5120,0.029279999434947968
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,10240,4096,0.032896000891923904
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,10240,3584,0.029920000582933426
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,10240,3584,0.025728000327944756
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,10240,65536,0.3136959969997406
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,10240,4096,0.024639999493956566
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,10240,65536,0.24793599545955658
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,10240,3072,0.023584000766277313
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,10240,3072,0.023455999791622162
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,10240,3584,0.023840000852942467
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,10240,2560,0.02099199965596199
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,10240,2560,0.02143999934196472
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,10240,3072,0.023264000192284584
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,10240,2560,0.020959999412298203
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,10240,2048,0.018688000738620758
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,10240,2048,0.01852799952030182
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,10240,1536,0.016383999958634377
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,10240,2048,0.01679999940097332
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,10240,1536,0.01651199907064438
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,10240,1024,0.014271999709308147
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,10240,1536,0.01635199971497059
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,10240,1024,0.013439999893307686
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,10240,1024,0.015776000916957855
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,10240,768,0.012223999947309494
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,10240,768,0.01235199999064207
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,10240,768,0.013952000066637993
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,10240,512,0.012128000147640705
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,10240,512,0.010879999957978725
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,10240,256,0.009631999768316746
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,10240,512,0.011807999573647976
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,10240,256,0.009952000342309475
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,10240,128,0.00854399986565113
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,10240,256,0.0098879998549819
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,10240,128,0.00848000030964613
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,10240,128,0.008832000195980072
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,10240,64,0.012575999833643436
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,10240,64,0.012480000033974648
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,10240,64,0.008287999778985977
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,10240,32,0.012959999963641167
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,10240,32,0.013055999763309956
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,10240,32,0.008704000152647495
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,8192,12288,0.044544000178575516
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,8192,16384,0.05369599908590317
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,8192,16384,0.06697600334882736
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,8192,16384,0.057631999254226685
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,8192,10240,0.040063999593257904
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,8192,12288,0.04492799937725067
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,8192,10240,0.04816000163555145
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,8192,8192,0.04278400167822838
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,8192,8192,0.03455999866127968
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,8192,12288,0.04975999891757965
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,8192,65536,0.16812799870967865
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,8192,10240,0.03868800029158592
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,8192,8192,0.03251200169324875
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,8192,65536,0.2224320024251938
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,8192,7168,0.03145600110292435
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,8192,7168,0.03868800029158592
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,8192,6144,0.028672000393271446
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,8192,6144,0.027295999228954315
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,8192,7168,0.031231999397277832
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,8192,6144,0.028063999488949776
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,8192,5120,0.03296000137925148
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,8192,4096,0.02831999957561493
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,8192,5120,0.038943998515605927
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,8192,5120,0.02457600086927414
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,8192,3584,0.026464000344276428
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,8192,4096,0.020416000857949257
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,8192,4096,0.022336000576615334
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,8192,3584,0.018880000337958336
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,8192,65536,0.19961600005626678
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,8192,3072,0.018271999433636665
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,8192,3584,0.020096000283956528
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,8192,3072,0.01756799966096878
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,8192,2560,0.02223999984562397
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,8192,3072,0.017696000635623932
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,8192,2560,0.015615999698638916
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,8192,2048,0.021183999255299568
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,8192,2560,0.017376000061631203
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,8192,2048,0.014047999866306782
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,8192,2048,0.014879999682307243
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,8192,1536,0.017216000705957413
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,8192,1536,0.02038400061428547
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,8192,1536,0.013344000093638897
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,8192,1024,0.011264000087976456
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,8192,1024,0.014112000353634357
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,8192,1024,0.01190400030463934
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,8192,768,0.010495999827980995
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,8192,768,0.012480000033974648
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,8192,512,0.009727999567985535
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,8192,768,0.011103999800980091
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,8192,512,0.01065600011497736
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,8192,256,0.008736000396311283
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,8192,512,0.010400000028312206
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,8192,256,0.008511999621987343
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,8192,256,0.009727999567985535
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,8192,128,0.00825599953532219
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,8192,128,0.007872000336647034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,8192,128,0.008895999751985073
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,8192,64,0.011680000461637974
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,8192,64,0.011839999817311764
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,8192,64,0.008383999578654766
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,8192,32,0.01228800043463707
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,8192,32,0.012095999903976917
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,8192,32,0.008511999621987343
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,7168,12288,0.0416640006005764
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,7168,12288,0.050175998359918594
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,7168,16384,0.06047999858856201
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,7168,16384,0.05008000135421753
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,7168,16384,0.052928000688552856
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,7168,12288,0.04230400174856186
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,7168,10240,0.04383999854326248
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,7168,10240,0.03728000074625015
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,7168,8192,0.03670400008559227
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,7168,10240,0.03686400130391121
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,7168,8192,0.033824000507593155
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,7168,8192,0.029632000252604485
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,7168,7168,0.03174399957060814
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,7168,7168,0.03174399957060814
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,7168,65536,0.15004800260066986
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,7168,6144,0.028416000306606293
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,7168,7168,0.02879999950528145
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,7168,65536,0.19756799936294556
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,7168,5120,0.02537599951028824
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,7168,6144,0.029311999678611755
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,7168,6144,0.0261439997702837
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,7168,5120,0.03903999924659729
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,7168,5120,0.023840000852942467
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,7168,4096,0.022495999932289124
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,7168,4096,0.0208320003002882
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,7168,4096,0.019648000597953796
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,7168,3584,0.020031999796628952
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,7168,3584,0.029759999364614487
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,7168,65536,0.1812800019979477
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,7168,3584,0.019840000197291374
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,7168,3072,0.01788800023496151
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,7168,3072,0.01711999997496605
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,7168,3072,0.01772800087928772
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,7168,2560,0.023296000435948372
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,7168,2560,0.016416000202298164
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,7168,2048,0.01408000010997057
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,7168,2560,0.015424000099301338
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,7168,2048,0.014751999638974667
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,7168,2048,0.015072000212967396
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,7168,1536,0.013407999649643898
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,7168,1024,0.011487999930977821
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,7168,1536,0.013728000223636627
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,7168,1024,0.010879999957978725
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,7168,1024,0.012128000147640705
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,7168,768,0.010400000028312206
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,7168,768,0.010048000141978264
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,7168,768,0.01056000031530857
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,7168,512,0.010400000028312206
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,7168,512,0.009151999838650227
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,7168,256,0.00848000030964613
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,7168,256,0.00848000030964613
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,7168,512,0.011136000044643879
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,7168,256,0.009119999594986439
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,7168,128,0.007840000092983246
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,7168,128,0.0081599997356534
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,7168,64,0.011296000331640244
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,7168,64,0.011615999974310398
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,7168,128,0.008511999621987343
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,7168,64,0.00848000030964613
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,7168,32,0.01206399966031313
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,7168,32,0.012128000147640705
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,7168,32,0.008767999708652496
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,7168,1536,0.01740800030529499
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,6144,12288,0.038656000047922134
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,6144,12288,0.044096000492572784
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,6144,16384,0.0544000007212162
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,6144,16384,0.07152000069618225
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,6144,16384,0.047200001776218414
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,6144,10240,0.04262400045990944
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,6144,12288,0.038176000118255615
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,6144,10240,0.03465599939227104
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,6144,10240,0.032896000891923904
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,6144,8192,0.036448001861572266
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,6144,8192,0.031328000128269196
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,6144,8192,0.02735999971628189
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,6144,7168,0.03161599859595299
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,6144,65536,0.1308159977197647
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,6144,7168,0.02860799990594387
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,6144,7168,0.02707199938595295
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,6144,65536,0.1746560037136078
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,6144,6144,0.03167999908328056
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,6144,6144,0.027103999629616737
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,6144,5120,0.027936000376939774
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,6144,5120,0.028255999088287354
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,6144,6144,0.024383999407291412
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,6144,4096,0.020576000213623047
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,6144,4096,0.02550400048494339
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,6144,5120,0.021344000473618507
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,6144,3584,0.024351999163627625
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,6144,3584,0.01865600049495697
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,6144,4096,0.019071999937295914
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,6144,3584,0.016896000131964684
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,6144,3072,0.022784000262618065
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,6144,3072,0.016767999157309532
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,6144,65536,0.1629440039396286
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,6144,3072,0.01616000011563301
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,6144,2560,0.02099199965596199
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,6144,2560,0.015904000028967857
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,6144,2560,0.016127999871969223
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,6144,2048,0.020128000527620316
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,6144,2048,0.020479999482631683
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,6144,2048,0.015647999942302704
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,6144,1536,0.018432000651955605
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,6144,1536,0.01235199999064207
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,6144,1536,0.013183999806642532
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,6144,1024,0.016095999628305435
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,6144,1024,0.01104000024497509
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,6144,768,0.010495999827980995
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,6144,1024,0.01104000024497509
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,6144,768,0.009759999811649323
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,6144,768,0.01056000031530857
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,6144,512,0.01152000017464161
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,6144,512,0.009279999881982803
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,6144,256,0.008511999621987343
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,6144,512,0.01056000031530857
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,6144,128,0.008287999778985977
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,6144,256,0.009759999811649323
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,6144,256,0.009056000038981438
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,6144,128,0.008736000396311283
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,6144,128,0.008608000352978706
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,6144,64,0.011680000461637974
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,6144,64,0.012703999876976013
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,6144,32,0.012927999719977379
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,6144,64,0.00800000037997961
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,6144,32,0.011711999773979187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,6144,32,0.008448000065982342
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,5120,12288,0.035071998834609985
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,5120,12288,0.04047999903559685
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,5120,16384,0.04131200164556503
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,5120,16384,0.04972799867391586
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,5120,16384,0.04355200007557869
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,5120,12288,0.033984001725912094
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,5120,10240,0.036959998309612274
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,5120,10240,0.03200000151991844
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,5120,8192,0.029664000496268272
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,5120,8192,0.03248000144958496
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,5120,7168,0.031072000041604042
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,5120,8192,0.025696000084280968
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,5120,7168,0.026815999299287796
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,5120,65536,0.11273600161075592
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,5120,65536,0.26659199595451355
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,5120,7168,0.02643200010061264
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,5120,6144,0.02879999950528145
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,5120,6144,0.025280000641942024
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,5120,5120,0.026399999856948853
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,5120,6144,0.02239999920129776
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,5120,5120,0.02800000086426735
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,5120,10240,0.030368000268936157
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,5120,5120,0.02038400061428547
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,5120,4096,0.019967999309301376
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,5120,4096,0.021503999829292297
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,5120,3584,0.02051199972629547
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,5120,4096,0.01897599920630455
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,5120,65536,0.14857600629329681
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,5120,3584,0.02956799976527691
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,5120,3072,0.01756799966096878
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,5120,3584,0.016543999314308167
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,5120,3072,0.026399999856948853
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,5120,3072,0.01961600035429001
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,5120,2048,0.019360000267624855
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,5120,2560,0.01600000075995922
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,5120,2560,0.023615999147295952
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,5120,2560,0.015231999568641186
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,5120,2048,0.020191999152302742
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,5120,2048,0.013663999736309052
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,5120,1536,0.01756799966096878
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,5120,1536,0.012608000077307224
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,5120,1024,0.01583999954164028
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,5120,1024,0.01571200042963028
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,5120,1024,0.010944000445306301
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,5120,1536,0.012575999833643436
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,5120,768,0.014208000153303146
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,5120,768,0.01017600018531084
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,5120,768,0.012032000347971916
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,5120,512,0.009503999724984169
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,5120,512,0.011008000001311302
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,5120,512,0.010080000385642052
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,5120,256,0.00825599953532219
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,5120,256,0.00854399986565113
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,5120,256,0.009375999681651592
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,5120,128,0.008287999778985977
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,5120,64,0.011552000418305397
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,5120,128,0.0077760000713169575
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,5120,64,0.012319999746978283
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,5120,128,0.009151999838650227
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,5120,32,0.011744000017642975
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,5120,64,0.008287999778985977
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,5120,32,0.011872000060975552
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,5120,32,0.008448000065982342
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,4096,12288,0.0363520011305809
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,4096,12288,0.03286400064826012
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,4096,16384,0.043168000876903534
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,4096,16384,0.03782400116324425
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,4096,16384,0.04054399952292442
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,4096,12288,0.03232000023126602
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,4096,10240,0.034623999148607254
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,4096,10240,0.03152000159025192
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,4096,8192,0.03046399913728237
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,4096,10240,0.028511999174952507
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,4096,8192,0.02755199931561947
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,4096,8192,0.026399999856948853
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,4096,7168,0.028255999088287354
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,4096,65536,0.09504000097513199
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,4096,65536,0.1297599971294403
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,4096,7168,0.029791999608278275
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,4096,6144,0.027264000847935677
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,4096,7168,0.03315199911594391
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,4096,6144,0.024992000311613083
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,4096,5120,0.02425600029528141
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,4096,5120,0.02364799939095974
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,4096,6144,0.02860799990594387
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,4096,4096,0.019328000023961067
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,4096,5120,0.018432000651955605
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,4096,4096,0.02175999991595745
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,4096,4096,0.01648000068962574
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,4096,3584,0.01817600056529045
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,4096,3584,0.018464000895619392
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,4096,3584,0.021023999899625778
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,4096,3072,0.017152000218629837
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,4096,65536,0.1364479959011078
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,4096,3072,0.017376000061631203
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,4096,3072,0.014783999882638454
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,4096,2560,0.01635199971497059
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,4096,2560,0.015263999812304974
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,4096,2560,0.013791999779641628
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,4096,2048,0.014976000413298607
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,4096,2048,0.013088000006973743
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,4096,2048,0.019807999953627586
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,4096,1536,0.01283199992030859
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,4096,1536,0.01235199999064207
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,4096,1024,0.010367999784648418
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,4096,1536,0.012095999903976917
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,4096,1024,0.010879999957978725
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,4096,768,0.010015999898314476
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,4096,1024,0.010879999957978725
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,4096,768,0.009952000342309475
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,4096,512,0.008991999551653862
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,4096,768,0.010528000071644783
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,4096,512,0.009184000082314014
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,4096,512,0.01033599954098463
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,4096,256,0.008511999621987343
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,4096,256,0.008960000239312649
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,4096,128,0.008191999979317188
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,4096,128,0.007840000092983246
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,4096,64,0.012000000104308128
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,4096,128,0.008799999952316284
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,4096,64,0.011648000217974186
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,4096,64,0.008511999621987343
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,4096,32,0.011680000461637974
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,4096,32,0.01206399966031313
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,4096,32,0.008352000266313553
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,4096,256,0.008191999979317188
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,3584,12288,0.0344959981739521
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,3584,16384,0.0360959991812706
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,3584,16384,0.04243199899792671
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,3584,12288,0.03219199925661087
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,3584,16384,0.037728000432252884
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,3584,12288,0.030688000842928886
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,3584,10240,0.03014400042593479
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,3584,10240,0.0331839993596077
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,3584,8192,0.03126399964094162
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,3584,10240,0.027456000447273254
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,3584,8192,0.028416000306606293
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,3584,7168,0.031647998839616776
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,3584,65536,0.08591999858617783
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,3584,8192,0.03404799848794937
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,3584,65536,0.11177600175142288
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,3584,7168,0.025312000885605812
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,3584,6144,0.027712000533938408
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,3584,6144,0.025728000327944756
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,3584,7168,0.032127998769283295
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,3584,5120,0.023679999634623528
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,3584,5120,0.024607999250292778
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,3584,6144,0.019872000440955162
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,3584,4096,0.01929599978029728
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,3584,65536,0.1302720010280609
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,3584,5120,0.02454400062561035
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,3584,4096,0.020255999639630318
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,3584,3584,0.018559999763965607
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,3584,3584,0.019551999866962433
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,3584,4096,0.015936000272631645
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,3584,3584,0.016831999644637108
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,3584,3072,0.016224000602960587
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,3584,2560,0.016095999628305435
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,3584,3072,0.016992000862956047
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,3584,3072,0.014751999638974667
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,3584,2048,0.014336000196635723
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,3584,2560,0.01539199985563755
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,3584,2560,0.013504000380635262
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,3584,2048,0.013887999579310417
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,3584,2048,0.01228800043463707
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,3584,1536,0.012191999703645706
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,3584,1536,0.013088000006973743
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,3584,1024,0.010495999827980995
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,3584,1024,0.010688000358641148
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,3584,1024,0.011008000001311302
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,3584,1536,0.012384000234305859
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,3584,768,0.010015999898314476
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,3584,768,0.009727999567985535
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,3584,512,0.009247999638319016
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,3584,768,0.012384000234305859
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,3584,256,0.008511999621987343
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,3584,256,0.008448000065982342
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,3584,512,0.008799999952316284
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,3584,512,0.010463999584317207
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,3584,256,0.008736000396311283
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,3584,128,0.007968000136315823
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,3584,128,0.008608000352978706
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,3584,64,0.01196799986064434
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,3584,128,0.007872000336647034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,3584,64,0.011615999974310398
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,3584,32,0.01158399973064661
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,3584,64,0.0080960001796484
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,3584,32,0.011744000017642975
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,3584,32,0.007968000136315823
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,3072,12288,0.0360959991812706
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,3072,12288,0.03219199925661087
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,3072,16384,0.03551999852061272
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,3072,16384,0.04064000025391579
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,3072,16384,0.03788800165057182
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,3072,12288,0.030047999694943428
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,3072,10240,0.03283200040459633
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,3072,10240,0.03046399913728237
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,3072,10240,0.040352001786231995
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,3072,8192,0.029184000566601753
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,3072,8192,0.027168000116944313
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,3072,7168,0.027327999472618103
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,3072,8192,0.02147199958562851
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,3072,7168,0.025087999179959297
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,3072,65536,0.11011199653148651
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,3072,65536,0.10070399940013885
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,3072,7168,0.022304000332951546
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,3072,6144,0.023264000192284584
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,3072,6144,0.022592000663280487
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,3072,5120,0.022336000576615334
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,3072,5120,0.026847999542951584
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,3072,4096,0.021023999899625778
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,3072,6144,0.028831999748945236
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,3072,4096,0.02038400061428547
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,3072,5120,0.024351999163627625
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,3072,3584,0.018559999763965607
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,3072,65536,0.12435200065374374
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,3072,3584,0.019840000197291374
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,3072,3072,0.016543999314308167
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,3072,3584,0.017055999487638474
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,3072,3072,0.01711999997496605
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,3072,2560,0.015615999698638916
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,3072,2560,0.01600000075995922
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,3072,3072,0.013887999579310417
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,3072,2560,0.01360000018030405
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,3072,2048,0.01369599997997284
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,3072,2048,0.013728000223636627
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,3072,2048,0.012543999589979649
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,3072,1536,0.012543999589979649
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,3072,1536,0.012640000320971012
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,3072,1536,0.01152000017464161
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,3072,1024,0.010912000201642513
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,3072,4096,0.01571200042963028
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,3072,1024,0.011071999557316303
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,3072,768,0.010111999697983265
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,3072,1024,0.010432000271975994
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,3072,768,0.010048000141978264
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,3072,512,0.009151999838650227
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,3072,768,0.011872000060975552
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,3072,512,0.009184000082314014
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,3072,512,0.010208000428974628
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,3072,256,0.008352000266313553
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,3072,256,0.00825599953532219
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,3072,128,0.008191999979317188
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,3072,128,0.0080960001796484
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,3072,256,0.008736000396311283
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,3072,64,0.01142400037497282
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,3072,128,0.008287999778985977
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,3072,64,0.01228800043463707
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,3072,32,0.012128000147640705
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,3072,32,0.011487999930977821
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,3072,64,0.008383999578654766
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,3072,32,0.0081599997356534
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,2560,12288,0.03465599939227104
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,2560,12288,0.030368000268936157
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,2560,16384,0.038176000118255615
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,2560,16384,0.035360001027584076
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,2560,16384,0.036031998693943024
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,2560,12288,0.03017600066959858
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,2560,10240,0.03219199925661087
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,2560,10240,0.0272000003606081
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,2560,10240,0.026240000501275063
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,2560,8192,0.03081599995493889
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,2560,8192,0.025407999753952026
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,2560,65536,0.06825599819421768
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,2560,7168,0.027744000777602196
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,2560,65536,0.0888959988951683
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,2560,8192,0.021568000316619873
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,2560,7168,0.023391999304294586
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,2560,6144,0.021088000386953354
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,2560,7168,0.021088000386953354
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,2560,6144,0.022175999358296394
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,2560,5120,0.024607999250292778
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,2560,6144,0.01945599913597107
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,2560,5120,0.020640000700950623
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,2560,5120,0.01788800023496151
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,2560,4096,0.01894400082528591
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,2560,65536,0.1226240023970604
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,2560,3584,0.018464000895619392
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,2560,4096,0.020096000283956528
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,2560,3584,0.017855999991297722
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,2560,4096,0.01548799965530634
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,2560,3584,0.01945599913597107
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,2560,3072,0.016831999644637108
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,2560,2560,0.01539199985563755
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,2560,3072,0.014112000353634357
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,2560,3072,0.01696000061929226
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,2560,2560,0.01583999954164028
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,2560,2048,0.014240000396966934
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,2560,2560,0.014047999866306782
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,2560,2048,0.013887999579310417
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,2560,2048,0.012543999589979649
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,2560,1536,0.012095999903976917
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,2560,1536,0.012191999703645706
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,2560,1024,0.010879999957978725
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,2560,1536,0.011168000288307667
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,2560,1024,0.011296000331640244
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,2560,768,0.010239999741315842
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,2560,1024,0.011392000131309032
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,2560,768,0.010015999898314476
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,2560,512,0.008991999551653862
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,2560,768,0.012319999746978283
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,2560,256,0.008128000423312187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,2560,512,0.008960000239312649
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,2560,512,0.010111999697983265
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,2560,256,0.008511999621987343
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,2560,256,0.009088000282645226
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,2560,128,0.008191999979317188
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,2560,128,0.00825599953532219
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,2560,64,0.011231999844312668
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,2560,128,0.008415999822318554
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,2560,64,0.011648000217974186
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,2560,64,0.008224000222980976
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,2560,32,0.011552000418305397
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,2560,32,0.01190400030463934
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,2560,32,0.00825599953532219
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,2048,12288,0.033055998384952545
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,2048,12288,0.02812799997627735
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,2048,16384,0.03139200061559677
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,2048,16384,0.03244800120592117
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,2048,16384,0.03673600032925606
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,2048,12288,0.04556800052523613
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,2048,10240,0.032607998698949814
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,2048,10240,0.03139200061559677
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,2048,10240,0.038495998829603195
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,2048,8192,0.02486399933695793
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,2048,8192,0.02937600016593933
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,2048,8192,0.02131200022995472
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,2048,7168,0.02239999920129776
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,2048,65536,0.07887999713420868
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,2048,65536,0.07664000242948532
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,2048,7168,0.025631999596953392
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,2048,6144,0.027615999802947044
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,2048,7168,0.030079999938607216
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,2048,6144,0.021247999742627144
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,2048,5120,0.02035200037062168
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,2048,5120,0.020416000857949257
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,2048,6144,0.018848000094294548
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,2048,4096,0.021247999742627144
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,2048,5120,0.02377600036561489
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,2048,4096,0.019551999866962433
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,2048,3584,0.019071999937295914
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,2048,4096,0.02054399996995926
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,2048,3584,0.018400000408291817
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,2048,3584,0.019231999292969704
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,2048,3072,0.016448000445961952
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,2048,3072,0.013952000066637993
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,2048,3072,0.016607999801635742
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,2048,2560,0.01462399959564209
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,2048,2560,0.01532800029963255
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,2048,2048,0.013311999849975109
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,2048,2048,0.013919999822974205
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,2048,2560,0.012992000207304955
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,2048,1536,0.012095999903976917
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,2048,2048,0.013344000093638897
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,2048,1536,0.01190400030463934
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,2048,1536,0.011231999844312668
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,2048,1024,0.010912000201642513
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,2048,1024,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,2048,1024,0.010912000201642513
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,2048,768,0.009535999968647957
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,2048,768,0.009920000098645687
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,2048,768,0.012000000104308128
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,2048,65536,0.12054400146007538
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,2048,512,0.009184000082314014
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,2048,512,0.009151999838650227
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,2048,512,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,2048,256,0.0080960001796484
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,2048,256,0.008224000222980976
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,2048,128,0.007679999805986881
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,2048,128,0.008448000065982342
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,2048,128,0.007903999648988247
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,2048,256,0.009151999838650227
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,2048,64,0.011615999974310398
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,2048,64,0.011776000261306763
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,2048,64,0.008191999979317188
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,2048,32,0.01158399973064661
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,2048,32,0.011776000261306763
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,2048,32,0.008191999979317188
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,1536,12288,0.032735999673604965
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,1536,12288,0.027008000761270523
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,1536,16384,0.03046399913728237
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,1536,16384,0.036959998309612274
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,1536,16384,0.03468799963593483
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,1536,12288,0.04444799944758415
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,1536,10240,0.02582399919629097
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,1536,10240,0.02502400055527687
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,1536,10240,0.038816001266241074
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,1536,8192,0.023840000852942467
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,1536,8192,0.025248000398278236
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,1536,8192,0.03190400078892708
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,1536,7168,0.022975999861955643
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,1536,65536,0.05462399870157242
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,1536,7168,0.023711999878287315
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,1536,65536,0.06585600227117538
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,1536,6144,0.021727999672293663
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,1536,7168,0.030079999938607216
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,1536,6144,0.021503999829292297
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,1536,5120,0.020800000056624413
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,1536,5120,0.020128000527620316
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,1536,6144,0.02703999914228916
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,1536,4096,0.020800000056624413
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,1536,4096,0.019519999623298645
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,1536,5120,0.017184000462293625
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,1536,4096,0.015200000256299973
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,1536,3584,0.019487999379634857
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,1536,65536,0.11894399672746658
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,1536,3584,0.014368000440299511
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,1536,3584,0.018559999763965607
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,1536,3072,0.0161920003592968
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,1536,3072,0.016896000131964684
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,1536,2560,0.014688000082969666
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,1536,3072,0.013824000023305416
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,1536,2560,0.01532800029963255
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,1536,2048,0.013632000423967838
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,1536,1536,0.012543999589979649
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,1536,2560,0.013024000450968742
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,1536,2048,0.012223999947309494
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,1536,1536,0.01228800043463707
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,1536,1024,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,1536,1536,0.012864000163972378
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,1536,1024,0.010688000358641148
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,1536,768,0.010015999898314476
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,1536,1024,0.010591999627649784
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,1536,768,0.00979200005531311
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,1536,512,0.009088000282645226
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,1536,768,0.011776000261306763
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,1536,512,0.009088000282645226
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,1536,256,0.008320000022649765
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,1536,256,0.008352000266313553
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,1536,512,0.01119999960064888
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,1536,256,0.008511999621987343
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,1536,128,0.008128000423312187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,1536,128,0.007935999892652035
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,1536,128,0.008576000109314919
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,1536,64,0.011327999643981457
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,1536,64,0.011744000017642975
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,1536,64,0.00848000030964613
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,1536,2048,0.01369599997997284
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,1536,32,0.011296000331640244
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,1536,32,0.011615999974310398
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,1536,32,0.00800000037997961
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,1024,12288,0.02675200067460537
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,1024,12288,0.02598400041460991
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,1024,16384,0.029343999922275543
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,1024,16384,0.029503999277949333
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,1024,16384,0.04041599854826927
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,1024,12288,0.028543999418616295
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,1024,10240,0.02550400048494339
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,1024,10240,0.02470399998128414
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,1024,8192,0.024383999407291412
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,1024,10240,0.0382080003619194
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,1024,8192,0.022495999932289124
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,1024,7168,0.022943999618291855
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,1024,7168,0.02070399932563305
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,1024,65536,0.050783999264240265
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,1024,8192,0.021247999742627144
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,1024,65536,0.06304000318050385
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,1024,6144,0.023360000923275948
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,1024,7168,0.029664000496268272
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,1024,6144,0.020160000771284103
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,1024,5120,0.01913600042462349
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,1024,4096,0.02127999998629093
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,1024,4096,0.017952000722289085
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,1024,6144,0.018239999189972878
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,1024,5120,0.01692800037562847
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,1024,5120,0.01929599978029728
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,1024,65536,0.11852800101041794
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,1024,3584,0.018239999189972878
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,1024,3584,0.019168000668287277
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,1024,4096,0.017023999243974686
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,1024,3072,0.016704000532627106
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,1024,3072,0.01711999997496605
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,1024,3072,0.013311999849975109
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,1024,3584,0.015615999698638916
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,1024,2560,0.014527999795973301
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,1024,2560,0.015296000055968761
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,1024,2048,0.0144640002399683
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,1024,2048,0.013504000380635262
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,1024,2560,0.012640000320971012
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,1024,2048,0.011711999773979187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,1024,1536,0.012384000234305859
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,1024,1536,0.012032000347971916
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,1024,1536,0.011392000131309032
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,1024,1024,0.010495999827980995
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,1024,768,0.009920000098645687
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,1024,1024,0.01065600011497736
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,1024,1024,0.013504000380635262
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,1024,768,0.009952000342309475
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,1024,768,0.011872000060975552
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,1024,512,0.009056000038981438
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,1024,512,0.009088000282645226
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,1024,256,0.008511999621987343
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,1024,256,0.008063999935984612
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,1024,128,0.0080960001796484
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,1024,512,0.010208000428974628
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,1024,256,0.008671999908983707
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,1024,128,0.0074880002066493034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,1024,128,0.008287999778985977
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,1024,64,0.011296000331640244
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,1024,32,0.011615999974310398
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,1024,64,0.01196799986064434
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,1024,32,0.011615999974310398
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,1024,64,0.008063999935984612
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,1024,32,0.00825599953532219
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,768,12288,0.024383999407291412
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,768,16384,0.029759999364614487
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,768,16384,0.028384000062942505
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,768,16384,0.034752000123262405
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,768,12288,0.04387199878692627
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,768,10240,0.027424000203609467
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,768,10240,0.024480000138282776
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,768,8192,0.02271999977529049
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,768,65536,0.062144000083208084
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,768,10240,0.024512000381946564
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,768,8192,0.022495999932289124
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,768,65536,0.04479999840259552
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,768,12288,0.024607999250292778
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,768,8192,0.021023999899625778
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,768,7168,0.024992000311613083
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,768,7168,0.020767999812960625
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,768,6144,0.020320000126957893
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,768,6144,0.01958400011062622
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,768,7168,0.02038400061428547
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,768,5120,0.019711999222636223
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,768,6144,0.01836800016462803
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,768,5120,0.019231999292969704
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,768,4096,0.021056000143289566
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,768,4096,0.01849599927663803
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,768,65536,0.11843200027942657
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,768,5120,0.01692800037562847
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,768,3584,0.017823999747633934
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,768,4096,0.016767999157309532
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,768,3584,0.01836800016462803
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,768,3072,0.01740800030529499
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,768,3584,0.013919999822974205
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,768,3072,0.016896000131964684
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,768,2560,0.01587199978530407
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,768,2560,0.015456000342965126
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,768,3072,0.0144640002399683
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,768,2048,0.013504000380635262
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,768,2560,0.013151999562978745
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,768,2048,0.013663999736309052
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,768,1536,0.012256000190973282
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,768,1536,0.012256000190973282
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,768,2048,0.011296000331640244
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,768,1536,0.011552000418305397
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,768,1024,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,768,1024,0.010239999741315842
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,768,1024,0.009952000342309475
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,768,768,0.009759999811649323
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,768,768,0.009568000212311745
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,768,512,0.009056000038981438
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,768,768,0.011711999773979187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,768,512,0.009535999968647957
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,768,256,0.008448000065982342
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,768,512,0.010367999784648418
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,768,256,0.00854399986565113
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,768,256,0.008736000396311283
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,768,128,0.0077760000713169575
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,768,128,0.008287999778985977
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,768,128,0.008415999822318554
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,768,64,0.011744000017642975
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,768,64,0.012128000147640705
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,768,64,0.0080960001796484
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,768,32,0.011264000087976456
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,768,32,0.011711999773979187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,768,32,0.007903999648988247
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,512,12288,0.032607998698949814
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,512,12288,0.024224000051617622
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,512,16384,0.026528000831604004
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,512,16384,0.026208000257611275
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,512,16384,0.040991999208927155
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,512,12288,0.04390399903059006
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,512,10240,0.023584000766277313
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,512,10240,0.023391999304294586
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,512,10240,0.024383999407291412
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,512,8192,0.022272000089287758
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,512,8192,0.02236800082027912
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,512,7168,0.0208320003002882
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,512,65536,0.04438399896025658
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,512,8192,0.02393599972128868
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,512,65536,0.04390399903059006
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,512,7168,0.020927999168634415
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,512,6144,0.021215999498963356
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,512,7168,0.0297279991209507
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,512,6144,0.020191999152302742
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,512,5120,0.020096000283956528
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,512,5120,0.020416000857949257
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,512,6144,0.018271999433636665
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,512,4096,0.01974399946630001
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,512,5120,0.016831999644637108
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,512,65536,0.11753600090742111
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,512,4096,0.019711999222636223
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,512,4096,0.02054399996995926
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,512,3584,0.01926399953663349
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,512,3584,0.018144000321626663
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,512,3072,0.016672000288963318
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,512,3584,0.01836800016462803
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,512,3072,0.01664000004529953
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,512,3072,0.01756799966096878
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,512,2560,0.015552000142633915
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,512,2560,0.015135999768972397
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,512,2560,0.01375999953597784
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,512,2048,0.01375999953597784
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,512,2048,0.013663999736309052
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,512,1536,0.012575999833643436
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,512,1536,0.012191999703645706
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,512,2048,0.011487999930977821
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,512,1536,0.011008000001311302
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,512,1024,0.010080000385642052
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,512,1024,0.01033599954098463
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,512,1024,0.010944000445306301
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,512,768,0.009855999611318111
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,512,768,0.010111999697983265
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,512,768,0.011680000461637974
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,512,512,0.009119999594986439
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,512,512,0.008927999995648861
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,512,256,0.007968000136315823
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,512,128,0.007840000092983246
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,512,256,0.0081599997356534
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,512,256,0.00854399986565113
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,512,128,0.007552000228315592
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,512,64,0.011327999643981457
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,512,128,0.008063999935984612
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,512,64,0.01142400037497282
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,512,32,0.011487999930977821
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,512,32,0.011327999643981457
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,512,64,0.0080960001796484
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,512,32,0.008063999935984612
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,256,12288,0.028543999418616295
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,512,512,0.010304000228643417
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,256,16384,0.030400000512599945
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,256,16384,0.03033600002527237
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,256,12288,0.029343999922275543
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,256,16384,0.03446400165557861
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,256,12288,0.044064000248909
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,256,10240,0.0297279991209507
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,256,10240,0.02956799976527691
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,256,8192,0.027327999472618103
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,256,10240,0.03814399987459183
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,256,8192,0.027135999873280525
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,256,65536,0.052928000688552856
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,256,8192,0.02067199908196926
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,256,7168,0.02457600086927414
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,256,7168,0.024992000311613083
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,256,65536,0.03948799893260002
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,256,6144,0.02239999920129776
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,256,7168,0.019936000928282738
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,256,6144,0.022816000506281853
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,256,5120,0.021088000386953354
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,256,5120,0.02038400061428547
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,256,6144,0.026496000587940216
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,256,5120,0.01664000004529953
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,256,65536,0.11715199798345566
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,256,4096,0.02099199965596199
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,256,4096,0.018719999119639397
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,256,3584,0.01817600056529045
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,256,4096,0.020128000527620316
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,256,3584,0.01772800087928772
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,256,3072,0.01744000054895878
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,256,3584,0.014175999909639359
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,256,3072,0.016767999157309532
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,256,2560,0.01568000018596649
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,256,3072,0.01360000018030405
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,256,2560,0.01532800029963255
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,256,2048,0.013279999606311321
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,256,2560,0.012191999703645706
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,256,2048,0.013472000136971474
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,256,1536,0.01228800043463707
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,256,2048,0.01414399966597557
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,256,1536,0.01158399973064661
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,256,1024,0.010367999784648418
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,256,1536,0.011008000001311302
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,256,1024,0.010495999827980995
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,256,1024,0.011071999557316303
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,256,768,0.009696000255644321
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,256,768,0.009696000255644321
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,256,512,0.008608000352978706
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,256,768,0.011327999643981457
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,256,512,0.008799999952316284
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,256,512,0.010208000428974628
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,256,256,0.007872000336647034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,256,256,0.008287999778985977
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,256,256,0.008736000396311283
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,256,128,0.007935999892652035
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,256,128,0.007615999784320593
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,256,128,0.008191999979317188
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,256,64,0.01152000017464161
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,256,32,0.01152000017464161
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,256,64,0.011455999687314034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,256,64,0.008031999692320824
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,256,32,0.011296000331640244
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,256,32,0.007903999648988247
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,128,12288,0.03407999873161316
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,128,12288,0.029791999608278275
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,128,16384,0.031231999397277832
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,128,16384,0.031488001346588135
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,128,16384,0.03468799963593483
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,128,12288,0.044256001710891724
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,128,10240,0.030239999294281006
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,128,10240,0.030592000111937523
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,128,8192,0.026976000517606735
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,128,10240,0.03824000060558319
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,128,8192,0.025919999927282333
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,128,7168,0.024288000538945198
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,128,8192,0.03152000159025192
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,128,65536,0.04259200021624565
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,128,65536,0.0414079986512661
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,128,7168,0.02380800060927868
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,128,6144,0.022592000663280487
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,128,6144,0.021088000386953354
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,128,5120,0.020416000857949257
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,128,7168,0.029311999678611755
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,128,6144,0.0180479995906353
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,128,5120,0.016863999888300896
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,128,65536,0.11699199676513672
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,128,4096,0.01836800016462803
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,128,4096,0.019328000023961067
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,128,3584,0.017023999243974686
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,128,4096,0.021056000143289566
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,128,3584,0.017920000478625298
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,128,3072,0.01724799908697605
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,128,3584,0.014175999909639359
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,128,3072,0.016575999557971954
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,128,2560,0.015647999942302704
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,128,3072,0.013439999893307686
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,128,2560,0.015168000012636185
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,128,2560,0.012128000147640705
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,128,5120,0.020031999796628952
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,128,2048,0.013407999649643898
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,128,2048,0.01360000018030405
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,128,2048,0.011872000060975552
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,128,1536,0.012256000190973282
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,128,1024,0.010111999697983265
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,128,1536,0.012191999703645706
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,128,1024,0.010304000228643417
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,128,1536,0.010623999871313572
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,128,1024,0.010080000385642052
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,128,768,0.009664000011980534
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,128,768,0.009503999724984169
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,128,768,0.011455999687314034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,128,512,0.009631999768316746
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,128,512,0.008960000239312649
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,128,256,0.008352000266313553
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,128,512,0.009983999654650688
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,128,256,0.008767999708652496
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,128,256,0.00886400043964386
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,128,128,0.007903999648988247
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,128,128,0.0077760000713169575
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,128,64,0.011392000131309032
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,128,128,0.008511999621987343
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,128,64,0.011455999687314034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,128,32,0.01142400037497282
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,128,64,0.0077760000713169575
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,128,32,0.011296000331640244
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,128,32,0.0080960001796484
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,64,12288,0.030400000512599945
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,64,12288,0.029152000322937965
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,64,16384,0.030079999938607216
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,64,16384,0.030432000756263733
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,64,16384,0.0344959981739521
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,64,12288,0.027488000690937042
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,64,10240,0.029343999922275543
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,64,10240,0.030368000268936157
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,64,8192,0.026208000257611275
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,64,10240,0.03763199970126152
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,64,8192,0.02598400041460991
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,64,7168,0.023871999233961105
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,64,8192,0.020416000857949257
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,64,65536,0.04089599847793579
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,64,65536,0.04265600070357323
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,64,7168,0.02953599952161312
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,64,7168,0.022911999374628067
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,64,6144,0.022495999932289124
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,64,6144,0.0225600004196167
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,64,6144,0.018015999346971512
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,64,5120,0.02035200037062168
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,64,5120,0.02035200037062168
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,64,65536,0.11689600348472595
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,64,4096,0.018688000738620758
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,64,4096,0.018592000007629395
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,64,4096,0.015615999698638916
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,64,5120,0.02319999970495701
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,64,3584,0.018144000321626663
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,64,3584,0.01772800087928772
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,64,3584,0.013856000266969204
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,64,3072,0.017535999417304993
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,64,3072,0.013279999606311321
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,64,2560,0.014688000082969666
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,64,3072,0.016607999801635742
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,64,2560,0.014816000126302242
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,64,2048,0.013952000066637993
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,64,2560,0.012191999703645706
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,64,2048,0.013632000423967838
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,64,2048,0.011776000261306763
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,64,1024,0.010239999741315842
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,64,1536,0.011552000418305397
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,64,1536,0.012128000147640705
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,64,1024,0.010111999697983265
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,64,1536,0.01196799986064434
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,64,1024,0.01027199998497963
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,64,768,0.009568000212311745
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,64,768,0.009983999654650688
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,64,512,0.008960000239312649
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,64,768,0.01033599954098463
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,64,512,0.008960000239312649
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,64,256,0.007968000136315823
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,64,256,0.008128000423312187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,64,512,0.00940799992531538
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,64,256,0.00902399979531765
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,64,128,0.008671999908983707
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,64,128,0.007840000092983246
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,64,128,0.008031999692320824
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,64,64,0.011071999557316303
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,64,64,0.011231999844312668
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,64,64,0.007903999648988247
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,64,32,0.011552000418305397
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,64,32,0.007807999849319458
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,32,12288,0.029983999207615852
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,32,16384,0.03046399913728237
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,32,16384,0.030208000913262367
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,32,16384,0.03331200033426285
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,32,12288,0.030079999938607216
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,64,32,0.011103999800980091
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,32,12288,0.04259200021624565
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,32,10240,0.030208000913262367
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,32,10240,0.03030399978160858
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,32,10240,0.03532800078392029
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,32,65536,0.04243199899792671
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,32,8192,0.026464000344276428
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,32,65536,0.040991999208927155
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,32,8192,0.027103999629616737
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,32,8192,0.028416000306606293
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,32,7168,0.023615999147295952
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,32,6144,0.02127999998629093
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,32,6144,0.021695999428629875
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,32,7168,0.023072000592947006
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,32,7168,0.028031999245285988
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,32,5120,0.02051199972629547
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,32,6144,0.02518399991095066
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,32,5120,0.019967999309301376
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,32,65536,0.19247999787330627
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,32,4096,0.018624000251293182
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,32,4096,0.017823999747633934
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,32,5120,0.02195199951529503
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,32,3584,0.01772800087928772
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,32,4096,0.019039999693632126
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,32,3584,0.018239999189972878
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,32,3072,0.016095999628305435
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,32,3584,0.017216000705957413
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,32,3072,0.016607999801635742
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,32,3072,0.016448000445961952
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,32,2560,0.015135999768972397
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,32,2048,0.013120000250637531
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,32,2560,0.015039999969303608
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,32,2560,0.015807999297976494
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,32,2048,0.013120000250637531
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,32,2048,0.013439999893307686
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,32,1536,0.012480000033974648
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,32,1536,0.011552000418305397
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,32,1024,0.01017600018531084
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,32,1024,0.010208000428974628
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,32,768,0.00940799992531538
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,32,1536,0.012000000104308128
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,32,1024,0.010623999871313572
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,32,768,0.009631999768316746
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,32,512,0.008736000396311283
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,32,512,0.00854399986565113
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,32,256,0.00800000037997961
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,32,768,0.01027199998497963
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,32,512,0.009920000098645687
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,32,256,0.00800000037997961
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,32,256,0.00848000030964613
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,32,128,0.007615999784320593
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,32,128,0.007679999805986881
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,32,64,0.010944000445306301
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,32,128,0.008287999778985977
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,32,64,0.011648000217974186
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,32,64,0.0080960001796484
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,64,32,32,0.011168000288307667
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,64,32,32,0.01142400037497282
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,64,32,32,0.008063999935984612
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,65536,12288,0.23984000086784363
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,65536,10240,0.2024639993906021
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,65536,12288,0.31484800577163696
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,65536,10240,0.2563199996948242
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,65536,16384,0.3203200101852417
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,65536,16384,0.4041920006275177
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,65536,12288,0.28512001037597656
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,65536,8192,0.2062399983406067
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,65536,8192,0.16502399742603302
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,65536,10240,0.2385600060224533
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,65536,16384,0.3715839982032776
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,65536,7168,0.147039994597435
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,65536,6144,0.12809599936008453
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,65536,7168,0.18531200289726257
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,65536,8192,0.1940159946680069
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,65536,6144,0.1600320041179657
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,65536,7168,0.17107200622558594
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,65536,5120,0.10921599715948105
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,65536,4096,0.09228800237178802
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,65536,4096,0.10995200276374817
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,65536,5120,0.12857599556446075
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,65536,6144,0.14947199821472168
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,65536,5120,0.1356479972600937
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,65536,3584,0.0806720033288002
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,65536,4096,0.10374400019645691
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,65536,3584,0.0989760011434555
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,65536,3072,0.08671999722719193
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,65536,3072,0.07206399738788605
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,65536,3584,0.09276799857616425
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,65536,2560,0.06195199862122536
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,65536,2048,0.06358399987220764
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,65536,3072,0.08182399719953537
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,65536,2560,0.07132799923419952
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,65536,2048,0.052960000932216644
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,65536,1536,0.043327998369932175
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,65536,1536,0.049375999718904495
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,65536,2048,0.059328000992536545
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,65536,1024,0.03532800078392029
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,65536,1024,0.038495998829603195
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,65536,1536,0.04726399853825569
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,65536,768,0.03094400092959404
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,65536,2560,0.07606399804353714
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,65536,768,0.029440000653266907
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,65536,1024,0.036288000643253326
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,65536,768,0.03097599931061268
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,65536,512,0.02582399919629097
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,65536,512,0.023360000923275948
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,65536,512,0.024607999250292778
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,65536,256,0.016543999314308167
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,65536,256,0.01756799966096878
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,65536,128,0.01369599997997284
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,65536,256,0.01616000011563301
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,65536,128,0.014336000196635723
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,65536,64,0.016575999557971954
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,65536,128,0.013311999849975109
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,65536,64,0.011071999557316303
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,65536,32,0.02022399939596653
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,65536,64,0.016063999384641647
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,65536,32,0.020576000213623047
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,65536,32,0.011487999930977821
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,16384,12288,0.06963200122117996
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,16384,12288,0.08607999980449677
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,16384,16384,0.10992000252008438
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,16384,16384,0.11148799955844879
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,16384,16384,0.09967999905347824
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,16384,12288,0.07929600030183792
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,16384,10240,0.07091200351715088
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,16384,10240,0.0753600001335144
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,16384,10240,0.06889600306749344
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,16384,8192,0.05967999994754791
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,16384,8192,0.04879999905824661
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,16384,7168,0.054687999188899994
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,16384,8192,0.05711999908089638
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,16384,7168,0.05244800075888634
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,16384,65536,0.33049601316452026
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,16384,6144,0.0504320003092289
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,16384,6144,0.04665600135922432
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,16384,7168,0.04972799867391586
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,16384,5120,0.03488000109791756
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,16384,5120,0.04383999854326248
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,16384,6144,0.044224001467227936
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,16384,4096,0.029279999434947968
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,16384,4096,0.036159999668598175
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,16384,65536,0.4015040099620819
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,16384,5120,0.038495998829603195
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,16384,3584,0.03299200162291527
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,16384,4096,0.0318400003015995
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,16384,3584,0.028095999732613564
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,16384,3072,0.024800000712275505
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,16384,3072,0.029343999922275543
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,16384,3584,0.028672000393271446
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,16384,2560,0.026528000831604004
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,16384,2560,0.025312000885605812
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,16384,3072,0.025631999596953392
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,16384,2560,0.02300800010561943
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,16384,2048,0.023744000121951103
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,16384,2048,0.019807999953627586
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,16384,2048,0.020800000056624413
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,16384,1536,0.019936000928282738
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,16384,65536,0.3703039884567261
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,16384,1536,0.017952000722289085
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,16384,1536,0.017311999574303627
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,16384,1024,0.01679999940097332
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,16384,1024,0.015456000342965126
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,16384,1024,0.014431999996304512
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,16384,768,0.015519999898970127
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,16384,768,0.01375999953597784
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,16384,768,0.01321600005030632
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,16384,512,0.014303999952971935
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,16384,512,0.01196799986064434
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,16384,512,0.010944000445306301
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,16384,256,0.010688000358641148
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,16384,256,0.01065600011497736
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,16384,128,0.009375999681651592
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,16384,256,0.009440000168979168
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,16384,128,0.009983999654650688
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,16384,128,0.008128000423312187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,16384,64,0.013344000093638897
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,16384,64,0.013311999849975109
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,16384,64,0.007712000049650669
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,16384,32,0.014271999709308147
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,16384,32,0.014240000396966934
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,16384,32,0.007968000136315823
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,12288,12288,0.06838399916887283
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,12288,16384,0.09059199690818787
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,12288,16384,0.08460800349712372
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,12288,16384,0.07891199737787247
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,12288,10240,0.058368001133203506
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,12288,12288,0.06435199826955795
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,12288,10240,0.060736000537872314
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,12288,12288,0.0679360032081604
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,12288,8192,0.04831999912858009
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,12288,10240,0.054816000163555145
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,12288,8192,0.047200001776218414
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,12288,65536,0.33087998628616333
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,12288,7168,0.04303999990224838
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,12288,8192,0.04483199864625931
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,12288,7168,0.04320000112056732
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,12288,6144,0.04009599983692169
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,12288,6144,0.04217600077390671
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,12288,7168,0.04016000032424927
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,12288,5120,0.03548799827694893
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,12288,6144,0.0352960005402565
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,12288,5120,0.033984001725912094
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,12288,4096,0.030239999294281006
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,12288,4096,0.030079999938607216
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,12288,65536,0.3118079900741577
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,12288,5120,0.031199999153614044
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,12288,3584,0.027615999802947044
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,12288,3584,0.026784000918269157
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,12288,4096,0.026176000013947487
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,12288,3584,0.02393599972128868
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,12288,65536,0.2847039997577667
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,12288,3072,0.025119999423623085
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,12288,3072,0.02518399991095066
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,12288,2560,0.023615999147295952
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,12288,3072,0.022048000246286392
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,12288,2560,0.0244159996509552
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,12288,2560,0.02131200022995472
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,12288,2048,0.020255999639630318
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,12288,2048,0.021695999428629875
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,12288,2048,0.018751999363303185
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,12288,1536,0.017343999817967415
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,12288,1536,0.018239999189972878
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,12288,1536,0.015936000272631645
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,12288,1024,0.015519999898970127
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,12288,1024,0.014879999682307243
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,12288,768,0.014495999552309513
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,12288,1024,0.013632000423967838
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,12288,768,0.01369599997997284
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,12288,768,0.012191999703645706
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,12288,512,0.013183999806642532
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,12288,512,0.012671999633312225
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,12288,256,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,12288,512,0.011264000087976456
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,12288,256,0.010528000071644783
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,12288,256,0.009151999838650227
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,12288,128,0.009600000455975533
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,12288,128,0.010143999941647053
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,12288,64,0.013535999692976475
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,12288,32,0.013791999779641628
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,12288,128,0.0080960001796484
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,12288,64,0.01360000018030405
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,12288,64,0.0074880002066493034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,12288,32,0.014368000440299511
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,12288,32,0.0081599997356534
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,10240,12288,0.06380800157785416
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,10240,12288,0.06838399916887283
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,10240,16384,0.08879999816417694
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,10240,16384,0.08303999900817871
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,10240,16384,0.06732799857854843
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,10240,10240,0.05689600110054016
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,10240,12288,0.055296000093221664
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,10240,10240,0.059328000992536545
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,10240,8192,0.04604800045490265
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,10240,10240,0.04979199916124344
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,10240,8192,0.04819199815392494
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,10240,7168,0.04188799858093262
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,10240,7168,0.04297599941492081
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,10240,8192,0.03932800143957138
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,10240,65536,0.3195520043373108
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,10240,7168,0.03702399879693985
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,10240,6144,0.038015998899936676
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,10240,6144,0.03929600119590759
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,10240,5120,0.033440001308918
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,10240,65536,0.31059199571609497
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,10240,5120,0.03494400158524513
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,10240,6144,0.0318400003015995
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,10240,4096,0.029023999348282814
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,10240,5120,0.028543999418616295
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,10240,4096,0.033952001482248306
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,10240,3584,0.031231999397277832
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,10240,3584,0.02703999914228916
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,10240,4096,0.023391999304294586
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,10240,3584,0.02179200015962124
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,10240,3072,0.024288000538945198
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,10240,3072,0.028511999174952507
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,10240,2560,0.025280000641942024
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,10240,2560,0.022336000576615334
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,10240,3072,0.022336000576615334
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,10240,65536,0.244159996509552
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,10240,2048,0.019648000597953796
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,10240,2560,0.019840000197291374
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,10240,2048,0.021407999098300934
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,10240,1536,0.018624000251293182
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,10240,2048,0.022143999114632607
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,10240,1536,0.017696000635623932
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,10240,1024,0.015424000099301338
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,10240,1024,0.014592000283300877
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,10240,1536,0.01532800029963255
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,10240,1024,0.01548799965530634
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,10240,768,0.013472000136971474
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,10240,512,0.012992000207304955
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,10240,768,0.01369599997997284
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,10240,512,0.011872000060975552
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,10240,512,0.011008000001311302
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,10240,256,0.010879999957978725
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,10240,256,0.010304000228643417
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,10240,256,0.008927999995648861
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,10240,128,0.010367999784648418
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,10240,128,0.009952000342309475
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,10240,64,0.013407999649643898
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,10240,128,0.007903999648988247
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,10240,64,0.013535999692976475
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,10240,64,0.007615999784320593
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,10240,32,0.01375999953597784
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,10240,32,0.014015999622642994
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,10240,32,0.007552000228315592
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,10240,768,0.014015999622642994
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,8192,12288,0.05040000006556511
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,8192,12288,0.04444799944758415
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,8192,16384,0.05417599901556969
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,8192,16384,0.0679360032081604
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,8192,16384,0.055776000022888184
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,8192,12288,0.04854400083422661
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,8192,10240,0.038975998759269714
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,8192,10240,0.04182400181889534
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,8192,10240,0.04297599941492081
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,8192,8192,0.04188799858093262
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,8192,8192,0.03331200033426285
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,8192,65536,0.17043200135231018
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,8192,7168,0.03270399942994118
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,8192,7168,0.03215999901294708
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,8192,8192,0.03311999887228012
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,8192,65536,0.22035199403762817
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,8192,6144,0.03673600032925606
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,8192,7168,0.030079999938607216
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,8192,6144,0.030751999467611313
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,8192,5120,0.026496000587940216
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,8192,6144,0.027135999873280525
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,8192,4096,0.023296000435948372
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,8192,5120,0.024768000468611717
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,8192,4096,0.02179200015962124
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,8192,5120,0.023584000766277313
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,8192,4096,0.020160000771284103
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,8192,3584,0.021088000386953354
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,8192,3584,0.03094400092959404
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,8192,3072,0.02502400055527687
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,8192,3072,0.016736000776290894
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,8192,65536,0.19855999946594238
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,8192,3072,0.027615999802947044
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,8192,2560,0.02271999977529049
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,8192,3584,0.018880000337958336
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,8192,2560,0.016831999644637108
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,8192,2048,0.01600000075995922
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,8192,2048,0.020959999412298203
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,8192,2560,0.015584000386297703
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,8192,1536,0.021376000717282295
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,8192,2048,0.013728000223636627
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,8192,1536,0.017920000478625298
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,8192,1536,0.01369599997997284
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,8192,1024,0.01820800080895424
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,8192,1024,0.012032000347971916
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,8192,768,0.016095999628305435
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,8192,1024,0.01206399966031313
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,8192,768,0.013567999936640263
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,8192,768,0.01065600011497736
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,8192,512,0.01027199998497963
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,8192,512,0.012799999676644802
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,8192,512,0.010367999784648418
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,8192,256,0.010495999827980995
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,8192,256,0.009440000168979168
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,8192,256,0.007968000136315823
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,8192,128,0.009216000325977802
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,8192,128,0.009279999881982803
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,8192,64,0.013120000250637531
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,8192,128,0.007679999805986881
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,8192,64,0.012959999963641167
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,8192,64,0.007360000163316727
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,8192,32,0.013279999606311321
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,8192,32,0.013311999849975109
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,8192,32,0.007584000006318092
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,7168,12288,0.04940799996256828
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,7168,12288,0.04047999903559685
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,7168,16384,0.050303999334573746
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,7168,16384,0.06115199998021126
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,7168,16384,0.051263999193906784
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,7168,12288,0.043007999658584595
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,7168,10240,0.04224000126123428
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,7168,10240,0.036959998309612274
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,7168,10240,0.038495998829603195
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,7168,8192,0.03548799827694893
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,7168,8192,0.032287999987602234
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,7168,65536,0.15225599706172943
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,7168,8192,0.030751999467611313
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,7168,6144,0.028960000723600388
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,7168,7168,0.03190400078892708
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,7168,65536,0.19593599438667297
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,7168,6144,0.02812799997627735
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,7168,7168,0.028031999245285988
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,7168,6144,0.025439999997615814
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,7168,5120,0.02566399984061718
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,7168,5120,0.04073600098490715
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,7168,4096,0.03385600075125694
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,7168,4096,0.02252800017595291
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,7168,5120,0.02179200015962124
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,7168,65536,0.17750400304794312
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,7168,4096,0.02300800010561943
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,7168,3584,0.02054399996995926
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,7168,3584,0.030527999624609947
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,7168,7168,0.03001599945127964
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,7168,3584,0.018079999834299088
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,7168,3072,0.01926399953663349
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,7168,3072,0.018303999677300453
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,7168,3072,0.016063999384641647
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,7168,2560,0.017983999103307724
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,7168,2560,0.016256000846624374
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,7168,2560,0.015615999698638916
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,7168,2048,0.01587199978530407
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,7168,2048,0.021888000890612602
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,7168,1536,0.01408000010997057
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,7168,2048,0.014368000440299511
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,7168,1536,0.013088000006973743
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,7168,1024,0.01206399966031313
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,7168,1536,0.012384000234305859
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,7168,1024,0.011807999573647976
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,7168,768,0.01152000017464161
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,7168,1024,0.011103999800980091
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,7168,768,0.013344000093638897
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,7168,768,0.010143999941647053
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,7168,512,0.010304000228643417
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,7168,512,0.009759999811649323
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,7168,512,0.009983999654650688
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,7168,256,0.009696000255644321
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,7168,256,0.009472000412642956
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,7168,128,0.009440000168979168
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,7168,256,0.007968000136315823
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,7168,128,0.009279999881982803
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,7168,64,0.012160000391304493
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,7168,128,0.007648000027984381
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,7168,64,0.012959999963641167
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,7168,64,0.007455999962985516
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,7168,32,0.013279999606311321
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,7168,32,0.013024000450968742
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,7168,32,0.0072639998979866505
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,6144,12288,0.03711999952793121
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,6144,12288,0.04598399996757507
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,6144,16384,0.0451200008392334
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,6144,16384,0.054687999188899994
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,6144,16384,0.046751998364925385
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,6144,12288,0.038816001266241074
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,6144,10240,0.04195199906826019
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,6144,10240,0.034015998244285583
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,6144,10240,0.034752000123262405
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,6144,8192,0.03683200106024742
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,6144,8192,0.03062400035560131
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,6144,8192,0.02860799990594387
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,6144,65536,0.13411200046539307
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,6144,7168,0.033055998384952545
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,6144,7168,0.035999998450279236
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,6144,6144,0.03203200176358223
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,6144,6144,0.027648000046610832
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,6144,7168,0.026079999282956123
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,6144,5120,0.028960000723600388
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,6144,6144,0.022943999618291855
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,6144,65536,0.16697600483894348
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,6144,5120,0.02489599958062172
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,6144,5120,0.020191999152302742
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,6144,4096,0.033663999289274216
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,6144,4096,0.026079999282956123
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,6144,3584,0.02473600022494793
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,6144,3584,0.019967999309301376
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,6144,4096,0.019392000511288643
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,6144,3584,0.016543999314308167
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,6144,3072,0.019680000841617584
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,6144,65536,0.15744000673294067
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,6144,3072,0.027744000777602196
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,6144,3072,0.016543999314308167
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,6144,2560,0.01727999933063984
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,6144,2560,0.024639999493956566
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,6144,2048,0.020767999812960625
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,6144,2560,0.015519999898970127
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,6144,2048,0.02147199958562851
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,6144,1536,0.019648000597953796
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,6144,2048,0.014560000039637089
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,6144,1536,0.013472000136971474
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,6144,1024,0.01820800080895424
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,6144,1536,0.013311999849975109
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,6144,1024,0.015168000012636185
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,6144,1024,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,6144,768,0.011359999887645245
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,6144,768,0.010879999957978725
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,6144,512,0.01228800043463707
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,6144,768,0.00979200005531311
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,6144,512,0.01033599954098463
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,6144,512,0.009855999611318111
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,6144,256,0.009631999768316746
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,6144,256,0.009983999654650688
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,6144,256,0.008287999778985977
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,6144,128,0.009247999638319016
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,6144,128,0.008671999908983707
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,6144,128,0.007935999892652035
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,6144,64,0.013567999936640263
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,6144,64,0.007360000163316727
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,6144,64,0.012992000207304955
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,6144,32,0.013183999806642532
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,6144,32,0.007935999892652035
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,5120,12288,0.04195199906826019
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,5120,16384,0.07206399738788605
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,5120,16384,0.04960000142455101
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,6144,32,0.01369599997997284
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,5120,16384,0.04249599948525429
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,5120,12288,0.03494400158524513
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,5120,12288,0.03651199862360954
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,5120,10240,0.038784001022577286
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,5120,10240,0.04742399975657463
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,5120,8192,0.03331200033426285
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,5120,8192,0.028575999662280083
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,5120,10240,0.03299200162291527
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,5120,65536,0.11299200356006622
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,5120,8192,0.026559999212622643
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,5120,7168,0.030112000182271004
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,5120,65536,0.14547200500965118
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,5120,7168,0.027295999228954315
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,5120,6144,0.02937600016593933
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,5120,6144,0.02687999978661537
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,5120,7168,0.024288000538945198
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,5120,5120,0.026496000587940216
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,5120,6144,0.02099199965596199
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,5120,5120,0.023871999233961105
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,5120,4096,0.022207999601960182
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,5120,5120,0.019551999866962433
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,5120,4096,0.021247999742627144
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,5120,65536,0.14153599739074707
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,5120,3584,0.023455999791622162
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,5120,4096,0.016416000202298164
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,5120,3584,0.019711999222636223
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,5120,3072,0.02364799939095974
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,5120,3584,0.015231999568641186
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,5120,3072,0.027648000046610832
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,5120,2560,0.020959999412298203
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,5120,3072,0.014783999882638454
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,5120,2560,0.024320000782608986
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,5120,2048,0.02160000056028366
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,5120,2560,0.014527999795973301
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,5120,2048,0.014911999925971031
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,5120,1536,0.014015999622642994
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,5120,2048,0.011872000060975552
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,5120,1536,0.013024000450968742
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,5120,1024,0.011807999573647976
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,5120,1024,0.015039999969303608
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,5120,1536,0.01196799986064434
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,5120,768,0.015168000012636185
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,5120,1024,0.010432000271975994
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,5120,768,0.011136000044643879
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,5120,512,0.012032000347971916
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,5120,768,0.009759999811649323
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,5120,512,0.010015999898314476
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,5120,256,0.00979200005531311
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,5120,512,0.008991999551653862
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,5120,256,0.00940799992531538
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,5120,128,0.009344000369310379
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,5120,256,0.008287999778985977
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,5120,128,0.008671999908983707
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,5120,128,0.007455999962985516
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,5120,64,0.012575999833643436
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,5120,64,0.01321600005030632
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,5120,64,0.007135999854654074
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,5120,32,0.013088000006973743
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,5120,32,0.012671999633312225
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,5120,32,0.007360000163316727
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,4096,12288,0.03711999952793121
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,4096,12288,0.03167999908328056
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,4096,16384,0.03811199963092804
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,4096,16384,0.04310400038957596
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,4096,16384,0.03903999924659729
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,4096,12288,0.03356799855828285
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,4096,10240,0.035232000052928925
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,4096,10240,0.03590400144457817
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,4096,8192,0.029152000322937965
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,4096,10240,0.03129599988460541
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,4096,8192,0.03206399828195572
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,4096,7168,0.03155200183391571
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,4096,7168,0.029952000826597214
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,4096,8192,0.024768000468611717
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,4096,65536,0.09878399968147278
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,4096,65536,0.1215360015630722
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,4096,7168,0.03219199925661087
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,4096,6144,0.028736000880599022
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,4096,6144,0.02908799983561039
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,4096,5120,0.02396799996495247
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,4096,5120,0.024671999737620354
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,4096,6144,0.027807999402284622
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,4096,5120,0.017503999173641205
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,4096,4096,0.02271999977529049
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,4096,4096,0.022112000733613968
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,4096,3584,0.019200000911951065
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,4096,65536,0.13251200318336487
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,4096,4096,0.015615999698638916
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,4096,3584,0.014720000326633453
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,4096,3584,0.01974399946630001
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,4096,3072,0.01897599920630455
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,4096,3072,0.018015999346971512
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,4096,2560,0.0163199994713068
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,4096,2560,0.01727999933063984
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,4096,2048,0.014399999752640724
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,4096,2560,0.013024000450968742
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,4096,2048,0.01484800036996603
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,4096,1536,0.01375999953597784
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,4096,2048,0.012095999903976917
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,4096,1536,0.013120000250637531
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,4096,1024,0.01190400030463934
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,4096,1024,0.011327999643981457
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,4096,1536,0.011103999800980091
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,4096,1024,0.010944000445306301
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,4096,768,0.011008000001311302
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,4096,768,0.01104000024497509
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,4096,3072,0.015359999611973763
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,4096,512,0.0098879998549819
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,4096,768,0.010015999898314476
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,4096,512,0.009696000255644321
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,4096,256,0.009375999681651592
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,4096,512,0.010463999584317207
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,4096,256,0.009472000412642956
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,4096,256,0.008576000109314919
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,4096,128,0.008671999908983707
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,4096,128,0.009503999724984169
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,4096,128,0.0074880002066493034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,4096,64,0.013120000250637531
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,4096,64,0.012223999947309494
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,4096,64,0.007424000184983015
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,4096,32,0.012896000407636166
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,4096,32,0.012992000207304955
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,4096,32,0.007712000049650669
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,3584,12288,0.03510399907827377
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,3584,12288,0.03171199932694435
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,3584,16384,0.03619199991226196
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,3584,16384,0.04089599847793579
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,3584,16384,0.03731200098991394
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,3584,12288,0.033535998314619064
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,3584,10240,0.033055998384952545
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,3584,10240,0.029440000653266907
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,3584,10240,0.029791999608278275
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,3584,8192,0.029983999207615852
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,3584,8192,0.027264000847935677
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,3584,8192,0.02252800017595291
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,3584,7168,0.027904000133275986
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,3584,7168,0.0261439997702837
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,3584,65536,0.08752000331878662
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,3584,6144,0.029120000079274178
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,3584,65536,0.11343999952077866
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,3584,7168,0.03136000037193298
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,3584,6144,0.025119999423623085
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,3584,5120,0.025439999997615814
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,3584,6144,0.0272000003606081
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,3584,5120,0.023552000522613525
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,3584,5120,0.017791999503970146
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,3584,4096,0.02131200022995472
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,3584,4096,0.020479999482631683
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,3584,4096,0.019872000440955162
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,3584,3584,0.01942400075495243
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,3584,3584,0.018688000738620758
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,3584,3584,0.015647999942302704
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,3584,65536,0.12572799623012543
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,3584,3072,0.0180479995906353
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,3584,3072,0.018912000581622124
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,3584,2560,0.016767999157309532
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,3584,3072,0.016992000862956047
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,3584,2560,0.016448000445961952
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,3584,2048,0.015296000055968761
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,3584,2048,0.01484800036996603
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,3584,2560,0.012543999589979649
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,3584,2048,0.011776000261306763
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,3584,1536,0.012927999719977379
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,3584,1536,0.013663999736309052
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,3584,1536,0.011008000001311302
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,3584,1024,0.012095999903976917
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,3584,1024,0.011615999974310398
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,3584,1024,0.009855999611318111
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,3584,768,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,3584,768,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,3584,512,0.010623999871313572
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,3584,768,0.011296000331640244
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,3584,512,0.0098879998549819
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,3584,256,0.00979200005531311
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,3584,256,0.009247999638319016
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,3584,512,0.009664000011980534
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,3584,256,0.00800000037997961
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,3584,128,0.009216000325977802
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,3584,128,0.009279999881982803
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,3584,128,0.007424000184983015
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,3584,64,0.012640000320971012
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,3584,64,0.01283199992030859
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,3584,64,0.007007999811321497
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,3584,32,0.012959999963641167
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,3584,32,0.012768000364303589
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,3584,32,0.0074880002066493034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,3072,12288,0.03532800078392029
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,3072,16384,0.03625600039958954
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,3072,16384,0.04028800129890442
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,3072,16384,0.03593600168824196
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,3072,12288,0.03094400092959404
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,3072,10240,0.02969600073993206
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,3072,10240,0.033344000577926636
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,3072,10240,0.029823999851942062
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,3072,8192,0.029791999608278275
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,3072,12288,0.03129599988460541
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,3072,65536,0.07772800326347351
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,3072,65536,0.09843199700117111
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,3072,8192,0.027008000761270523
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,3072,8192,0.034752000123262405
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,3072,7168,0.028095999732613564
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,3072,7168,0.02473600022494793
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,3072,6144,0.028960000723600388
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,3072,6144,0.0315839983522892
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,3072,7168,0.022272000089287758
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,3072,5120,0.026079999282956123
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,3072,6144,0.026847999542951584
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,3072,5120,0.02755199931561947
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,3072,5120,0.02332800067961216
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,3072,4096,0.020896000787615776
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,3072,4096,0.020416000857949257
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,3072,65536,0.12089599668979645
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,3072,4096,0.020767999812960625
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,3072,3584,0.019487999379634857
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,3072,3584,0.019392000511288643
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,3072,3584,0.01532800029963255
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,3072,3072,0.01775999926030636
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,3072,3072,0.017535999417304993
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,3072,3072,0.013439999893307686
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,3072,2560,0.01692800037562847
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,3072,2560,0.016543999314308167
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,3072,2560,0.015552000142633915
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,3072,2048,0.015135999768972397
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,3072,2048,0.014655999839305878
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,3072,2048,0.011264000087976456
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,3072,1536,0.013663999736309052
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,3072,1536,0.013344000093638897
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,3072,1024,0.012575999833643436
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,3072,1024,0.01196799986064434
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,3072,1024,0.009759999811649323
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,3072,1536,0.012671999633312225
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,3072,768,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,3072,768,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,3072,768,0.011231999844312668
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,3072,512,0.010304000228643417
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,3072,512,0.010208000428974628
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,3072,256,0.009119999594986439
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,3072,256,0.009440000168979168
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,3072,512,0.009727999567985535
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,3072,128,0.00886400043964386
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,3072,256,0.00774399982765317
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,3072,128,0.009088000282645226
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,3072,64,0.012799999676644802
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,3072,128,0.007679999805986881
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,3072,64,0.007296000141650438
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,3072,64,0.012608000077307224
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,3072,32,0.012415999546647072
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,3072,32,0.012864000163972378
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,3072,32,0.007455999962985516
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,2560,12288,0.03446400165557861
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,2560,12288,0.032287999987602234
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,2560,16384,0.038975998759269714
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,2560,16384,0.03420799970626831
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,2560,16384,0.03587200120091438
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,2560,12288,0.029823999851942062
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,2560,10240,0.027327999472618103
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,2560,10240,0.02687999978661537
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,2560,8192,0.025567999109625816
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,2560,10240,0.04134399816393852
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,2560,8192,0.02489599958062172
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,2560,7168,0.02425600029528141
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,2560,8192,0.022048000246286392
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,2560,65536,0.06972800195217133
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,2560,7168,0.022943999618291855
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,2560,6144,0.028831999748945236
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,2560,7168,0.030912000685930252
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,2560,65536,0.0902400016784668
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,2560,6144,0.022655999287962914
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,2560,5120,0.02099199965596199
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,2560,6144,0.018303999677300453
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,2560,5120,0.02534399926662445
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,2560,4096,0.022048000246286392
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,2560,4096,0.021056000143289566
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,2560,65536,0.11779200285673141
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,2560,4096,0.014783999882638454
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,2560,5120,0.022816000506281853
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,2560,3584,0.01990400068461895
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,2560,3584,0.018751999363303185
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,2560,3584,0.01929599978029728
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,2560,3072,0.018688000738620758
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,2560,3072,0.01817600056529045
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,2560,2560,0.016831999644637108
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,2560,3072,0.013120000250637531
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,2560,2560,0.016607999801635742
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,2560,2048,0.014368000440299511
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,2560,2560,0.011935999616980553
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,2560,2048,0.014783999882638454
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,2560,1536,0.013504000380635262
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,2560,2048,0.011168000288307667
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,2560,1536,0.013344000093638897
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,2560,1536,0.01027199998497963
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,2560,1024,0.011615999974310398
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,2560,1024,0.013151999562978745
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,2560,768,0.010528000071644783
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,2560,768,0.01142400037497282
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,2560,768,0.01119999960064888
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,2560,512,0.010400000028312206
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,2560,512,0.009952000342309475
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,2560,256,0.008960000239312649
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,2560,512,0.009535999968647957
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,2560,256,0.009279999881982803
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,2560,256,0.00800000037997961
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,2560,128,0.009056000038981438
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,2560,128,0.009088000282645226
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,2560,128,0.007935999892652035
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,2560,64,0.011359999887645245
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,2560,64,0.012543999589979649
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,2560,64,0.007199999876320362
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,2560,32,0.012927999719977379
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,2560,32,0.012671999633312225
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,2560,32,0.007296000141650438
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,2560,1024,0.01152000017464161
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,2048,12288,0.03200000151991844
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,2048,16384,0.03638400137424469
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,2048,16384,0.03177599981427193
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,2048,12288,0.02659199945628643
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,2048,16384,0.03468799963593483
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,2048,12288,0.04713600128889084
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,2048,10240,0.030688000842928886
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,2048,10240,0.024607999250292778
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,2048,10240,0.04022400081157684
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,2048,8192,0.024927999824285507
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,2048,8192,0.028416000306606293
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,2048,7168,0.02223999984562397
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,2048,65536,0.06224000081419945
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,2048,7168,0.023360000923275948
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,2048,65536,0.0793600007891655
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,2048,8192,0.032896000891923904
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,2048,6144,0.02236800082027912
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,2048,7168,0.021023999899625778
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,2048,5120,0.025567999109625816
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,2048,5120,0.020800000056624413
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,2048,6144,0.018015999346971512
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,2048,6144,0.021856000646948814
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,2048,5120,0.01664000004529953
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,2048,4096,0.02038400061428547
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,2048,4096,0.02067199908196926
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,2048,3584,0.01929599978029728
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,2048,3584,0.019551999866962433
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,2048,65536,0.11526399850845337
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,2048,4096,0.014368000440299511
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,2048,3584,0.015263999812304974
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,2048,3072,0.017216000705957413
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,2048,3072,0.01283199992030859
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,2048,3072,0.018079999834299088
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,2048,2560,0.015584000386297703
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,2048,2560,0.016416000202298164
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,2048,2048,0.014208000153303146
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,2048,2560,0.013151999562978745
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,2048,2048,0.01484800036996603
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,2048,2048,0.011807999573647976
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,2048,1536,0.013024000450968742
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,2048,1536,0.013887999579310417
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,2048,1024,0.01142400037497282
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,2048,1536,0.01033599954098463
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,2048,1024,0.011264000087976456
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,2048,1024,0.009759999811649323
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,2048,768,0.010879999957978725
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,2048,768,0.010912000201642513
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,2048,512,0.010304000228643417
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,2048,768,0.011455999687314034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,2048,512,0.01027199998497963
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,2048,512,0.009151999838650227
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,2048,256,0.009184000082314014
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,2048,256,0.009216000325977802
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,2048,256,0.007935999892652035
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,2048,128,0.008736000396311283
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,2048,128,0.008960000239312649
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,2048,128,0.007455999962985516
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,2048,64,0.012703999876976013
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,2048,64,0.012160000391304493
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,2048,64,0.007135999854654074
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,2048,32,0.012543999589979649
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,2048,32,0.012608000077307224
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,2048,32,0.007040000054985285
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,1536,12288,0.0261439997702837
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,1536,12288,0.027744000777602196
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,1536,16384,0.030271999537944794
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,1536,16384,0.03001599945127964
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,1536,16384,0.03481600061058998
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,1536,12288,0.028704000636935234
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,1536,10240,0.025248000398278236
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,1536,10240,0.02364799939095974
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,1536,8192,0.024992000311613083
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,1536,7168,0.02287999913096428
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,1536,65536,0.06460800021886826
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,1536,10240,0.025696000084280968
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,1536,8192,0.0331839993596077
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,1536,65536,0.06739199906587601
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,1536,6144,0.022495999932289124
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,1536,7168,0.02175999991595745
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,1536,6144,0.021407999098300934
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,1536,5120,0.022655999287962914
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,1536,7168,0.029791999608278275
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,1536,6144,0.01775999926030636
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,1536,8192,0.022495999932289124
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,1536,65536,0.11433599889278412
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,1536,5120,0.020735999569296837
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,1536,4096,0.022143999114632607
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,1536,5120,0.015744000673294067
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,1536,4096,0.02115200087428093
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,1536,4096,0.013887999579310417
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,1536,3584,0.020320000126957893
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,1536,3584,0.0191040001809597
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,1536,3584,0.01462399959564209
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,1536,3072,0.01727999933063984
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,1536,2560,0.01759999990463257
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,1536,3072,0.018112000077962875
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,1536,3072,0.016095999628305435
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,1536,2560,0.016127999871969223
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,1536,2560,0.011680000461637974
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,1536,2048,0.01484800036996603
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,1536,2048,0.014976000413298607
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,1536,1536,0.013632000423967838
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,1536,1536,0.013279999606311321
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,1536,2048,0.011231999844312668
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,1536,1536,0.010432000271975994
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,1536,1024,0.01152000017464161
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,1536,1024,0.011455999687314034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,1536,1024,0.009855999611318111
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,1536,768,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,1536,768,0.011136000044643879
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,1536,512,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,1536,512,0.0098879998549819
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,1536,512,0.009151999838650227
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,1536,768,0.011487999930977821
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,1536,256,0.009088000282645226
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,1536,256,0.009056000038981438
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,1536,256,0.007807999849319458
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,1536,128,0.00940799992531538
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,1536,128,0.008799999952316284
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,1536,128,0.007327999919652939
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,1536,64,0.012256000190973282
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,1536,64,0.012703999876976013
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,1536,64,0.006976000033318996
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,1536,32,0.012415999546647072
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,1536,32,0.012703999876976013
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,1536,32,0.007391999941319227
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,1024,12288,0.02486399933695793
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,1024,12288,0.024159999564290047
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,1024,16384,0.028224000707268715
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,1024,16384,0.028511999174952507
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,1024,16384,0.05459199845790863
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,1024,12288,0.028672000393271446
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,1024,10240,0.024480000138282776
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,1024,10240,0.02332800067961216
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,1024,8192,0.02300800010561943
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,1024,8192,0.021376000717282295
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,1024,10240,0.0398080013692379
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,1024,8192,0.03235200047492981
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,1024,65536,0.05305600166320801
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,1024,7168,0.02348800003528595
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,1024,7168,0.0208320003002882
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,1024,65536,0.05385600030422211
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,1024,6144,0.024447999894618988
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,1024,7168,0.02953599952161312
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,1024,6144,0.02175999991595745
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,1024,5120,0.023840000852942467
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,1024,6144,0.017376000061631203
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,1024,5120,0.02038400061428547
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,1024,4096,0.02022399939596653
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,1024,5120,0.01571200042963028
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,1024,65536,0.11446399986743927
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,1024,4096,0.01945599913597107
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,1024,4096,0.015647999942302704
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,1024,3584,0.020031999796628952
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,1024,3584,0.01926399953663349
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,1024,3072,0.0191040001809597
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,1024,3584,0.014399999752640724
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,1024,3072,0.01772800087928772
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,1024,3072,0.012160000391304493
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,1024,2560,0.015968000516295433
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,1024,2560,0.01651199907064438
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,1024,2560,0.011487999930977821
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,1024,2048,0.01484800036996603
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,1024,2048,0.0144640002399683
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,1024,2048,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,1024,1536,0.013311999849975109
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,1024,1536,0.012736000120639801
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,1024,1536,0.009952000342309475
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,1024,1024,0.011935999616980553
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,1024,1024,0.011392000131309032
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,1024,1024,0.009312000125646591
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,1024,768,0.010528000071644783
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,1024,768,0.01065600011497736
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,1024,768,0.010879999957978725
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,1024,512,0.010143999941647053
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,1024,512,0.01017600018531084
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,1024,512,0.009344000369310379
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,1024,256,0.009151999838650227
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,1024,256,0.009727999567985535
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,1024,256,0.0074880002066493034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,1024,128,0.008608000352978706
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,1024,128,0.007135999854654074
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,1024,64,0.012480000033974648
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,1024,64,0.01244799979031086
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,1024,32,0.012543999589979649
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,1024,64,0.007360000163316727
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,1024,32,0.01228800043463707
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,1024,32,0.006943999789655209
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,768,12288,0.026784000918269157
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,768,16384,0.029279999434947968
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,768,16384,0.026976000517606735
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,1024,128,0.008511999621987343
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,768,16384,0.05484800040721893
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,768,12288,0.023135999217629433
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,768,12288,0.028192000463604927
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,768,10240,0.02304000034928322
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,768,10240,0.022816000506281853
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,768,8192,0.024671999737620354
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,768,10240,0.038943998515605927
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,768,8192,0.021215999498963356
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,768,7168,0.025407999753952026
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,768,65536,0.046560000628232956
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,768,65536,0.047520000487565994
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,768,8192,0.031968001276254654
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,768,7168,0.021503999829292297
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,768,7168,0.029279999434947968
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,768,6144,0.021536000072956085
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,768,6144,0.025599999353289604
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,768,5120,0.02239999920129776
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,768,65536,0.11484800279140472
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,768,6144,0.02627200074493885
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,768,4096,0.020608000457286835
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,768,4096,0.020608000457286835
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,768,5120,0.02179200015962124
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,768,5120,0.01548799965530634
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,768,4096,0.013887999579310417
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,768,3584,0.02038400061428547
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,768,3584,0.01929599978029728
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,768,3584,0.014751999638974667
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,768,3072,0.01708799973130226
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,768,3072,0.017535999417304993
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,768,3072,0.0161920003592968
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,768,2560,0.016736000776290894
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,768,2560,0.01583999954164028
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,768,2048,0.014336000196635723
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,768,2560,0.01158399973064661
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,768,1536,0.014015999622642994
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,768,2048,0.01484800036996603
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,768,1536,0.012640000320971012
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,768,2048,0.012959999963641167
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,768,1024,0.011744000017642975
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,768,1536,0.009824000298976898
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,768,1024,0.011392000131309032
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,768,1024,0.010111999697983265
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,768,768,0.011008000001311302
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,768,768,0.011008000001311302
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,768,512,0.009631999768316746
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,768,768,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,768,512,0.009983999654650688
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,768,512,0.009375999681651592
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,768,256,0.009503999724984169
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,768,256,0.009119999594986439
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,768,256,0.0081599997356534
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,768,128,0.008671999908983707
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,768,128,0.009151999838650227
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,768,128,0.006912000011652708
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,768,64,0.012575999833643436
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,768,64,0.012512000277638435
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,768,64,0.007327999919652939
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,768,32,0.012415999546647072
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,768,32,0.012640000320971012
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,768,32,0.007135999854654074
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,512,12288,0.024639999493956566
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,512,12288,0.024351999163627625
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,512,16384,0.02723200060427189
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,512,16384,0.039872001856565475
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,512,16384,0.027008000761270523
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,512,12288,0.045343998819589615
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,512,10240,0.02364799939095974
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,512,10240,0.023584000766277313
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,512,8192,0.02284800074994564
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,512,10240,0.03868800029158592
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,512,8192,0.02380800060927868
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,512,8192,0.024480000138282776
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,512,7168,0.023231999948620796
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,512,65536,0.043935999274253845
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,512,65536,0.046431999653577805
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,512,7168,0.022624000906944275
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,512,7168,0.02940800040960312
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,512,6144,0.02332800067961216
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,512,6144,0.023615999147295952
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,512,5120,0.022143999114632607
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,512,6144,0.017343999817967415
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,512,65536,0.11475200206041336
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,512,5120,0.0226879995316267
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,512,4096,0.020096000283956528
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,512,5120,0.015807999297976494
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,512,4096,0.020320000126957893
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,512,3584,0.01836800016462803
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,512,3584,0.01961600035429001
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,512,3072,0.01788800023496151
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,512,3584,0.013120000250637531
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,512,2560,0.016896000131964684
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,512,3072,0.017503999173641205
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,512,3072,0.012384000234305859
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,512,2560,0.015744000673294067
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,512,2560,0.012319999746978283
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,512,2048,0.015168000012636185
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,512,2048,0.014783999882638454
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,512,1536,0.012927999719977379
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,512,2048,0.010912000201642513
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,512,1536,0.012959999963641167
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,512,1536,0.011711999773979187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,512,1024,0.011359999887645245
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,512,1024,0.01142400037497282
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,512,1024,0.008991999551653862
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,512,768,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,512,768,0.010944000445306301
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,512,4096,0.015519999898970127
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,512,768,0.010591999627649784
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,512,512,0.009503999724984169
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,512,512,0.010143999941647053
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,512,512,0.009151999838650227
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,512,256,0.009119999594986439
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,512,128,0.009600000455975533
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,512,256,0.008960000239312649
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,512,256,0.00800000037997961
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,512,128,0.008352000266313553
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,512,128,0.006943999789655209
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,512,64,0.012512000277638435
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,512,64,0.012543999589979649
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,512,64,0.006912000011652708
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,512,32,0.012608000077307224
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,512,32,0.012575999833643436
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,512,32,0.007040000054985285
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,256,12288,0.03174399957060814
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,256,12288,0.03215999901294708
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,256,16384,0.03417599946260452
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,256,16384,0.03392000123858452
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,256,16384,0.03385600075125694
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,256,12288,0.044415999203920364
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,256,10240,0.02985600009560585
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,256,10240,0.029888000339269638
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,256,10240,0.038336001336574554
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,256,8192,0.026240000501275063
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,256,8192,0.025887999683618546
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,256,65536,0.042527999728918076
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,256,8192,0.022175999358296394
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,256,65536,0.05353600159287453
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,256,7168,0.02412799932062626
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,256,7168,0.02396799996495247
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,256,7168,0.020576000213623047
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,256,6144,0.022943999618291855
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,256,6144,0.02287999913096428
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,256,5120,0.020735999569296837
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,256,6144,0.025439999997615814
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,256,5120,0.02208000048995018
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,256,4096,0.02236800082027912
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,256,5120,0.015744000673294067
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,256,65536,0.11423999816179276
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,256,4096,0.01926399953663349
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,256,4096,0.013824000023305416
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,256,3584,0.018239999189972878
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,256,3584,0.012992000207304955
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,256,3584,0.019328000023961067
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,256,3072,0.018688000738620758
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,256,3072,0.012223999947309494
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,256,3072,0.01772800087928772
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,256,2560,0.016607999801635742
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,256,2048,0.014208000153303146
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,256,2560,0.01603199914097786
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,256,2048,0.014592000283300877
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,256,2560,0.011231999844312668
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,256,1536,0.013407999649643898
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,256,2048,0.013055999763309956
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,256,1536,0.01321600005030632
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,256,1536,0.011487999930977821
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,256,1024,0.011807999573647976
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,256,1024,0.011359999887645245
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,256,768,0.010048000141978264
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,256,1024,0.009312000125646591
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,256,768,0.010367999784648418
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,256,512,0.010111999697983265
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,256,512,0.009503999724984169
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,256,768,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,256,256,0.008991999551653862
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,256,512,0.00902399979531765
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,256,256,0.00886400043964386
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,256,128,0.00886400043964386
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,256,256,0.007968000136315823
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,256,128,0.008927999995648861
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,256,64,0.01283199992030859
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,256,128,0.007199999876320362
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,256,64,0.012512000277638435
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,256,32,0.012575999833643436
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,256,64,0.007104000076651573
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,256,32,0.01235199999064207
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,256,32,0.006912000011652708
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,128,12288,0.03222399950027466
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,128,12288,0.03171199932694435
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,128,16384,0.03379200026392937
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,128,16384,0.033824000507593155
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,128,12288,0.04447999969124794
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,128,10240,0.028991999104619026
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,128,10240,0.029120000079274178
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,128,8192,0.026176000013947487
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,128,8192,0.026176000013947487
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,128,65536,0.04230400174856186
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,128,10240,0.03852799907326698
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,128,65536,0.04428799822926521
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,128,16384,0.03999999910593033
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,128,8192,0.022016000002622604
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,128,6144,0.02252800017595291
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,128,7168,0.023296000435948372
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,128,7168,0.024480000138282776
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,128,6144,0.023423999547958374
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,128,7168,0.02876799926161766
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,128,6144,0.017376000061631203
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,128,5120,0.02160000056028366
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,128,65536,0.11420799791812897
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,128,5120,0.021247999742627144
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,128,4096,0.02195199951529503
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,128,5120,0.01568000018596649
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,128,4096,0.019519999623298645
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,128,3584,0.02022399939596653
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,128,4096,0.013919999822974205
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,128,3584,0.01849599927663803
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,128,3072,0.01775999926030636
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,128,3584,0.012959999963641167
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,128,3072,0.017791999503970146
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,128,2560,0.0161920003592968
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,128,3072,0.012319999746978283
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,128,2560,0.0161920003592968
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,128,2560,0.010975999757647514
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,128,2048,0.014560000039637089
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,128,2048,0.01532800029963255
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,128,1536,0.012415999546647072
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,128,2048,0.012608000077307224
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,128,1536,0.013311999849975109
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,128,1024,0.012000000104308128
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,128,1024,0.011231999844312668
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,128,1536,0.009312000125646591
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,128,768,0.010528000071644783
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,128,1024,0.009279999881982803
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,128,768,0.010912000201642513
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,128,512,0.009920000098645687
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,128,512,0.009952000342309475
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,128,768,0.010623999871313572
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,128,512,0.008736000396311283
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,128,256,0.008895999751985073
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,128,256,0.009503999724984169
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,128,128,0.008608000352978706
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,128,256,0.00774399982765317
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,128,128,0.009088000282645226
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,128,128,0.007135999854654074
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,128,64,0.01228800043463707
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,128,64,0.011935999616980553
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,128,64,0.0071680000983178616
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,128,32,0.012480000033974648
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,128,32,0.014527999795973301
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,128,32,0.007296000141650438
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,64,12288,0.03215999901294708
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,64,16384,0.03276799991726875
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,64,12288,0.032735999673604965
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,64,16384,0.03436800092458725
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,64,16384,0.03363199904561043
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,64,12288,0.027807999402284622
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,64,10240,0.029055999591946602
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,64,10240,0.028224000707268715
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,64,8192,0.025631999596953392
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,64,10240,0.03843199834227562
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,64,8192,0.025728000327944756
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,64,7168,0.02393599972128868
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,64,65536,0.04227200150489807
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,64,8192,0.03286400064826012
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,64,65536,0.0453759990632534
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,64,7168,0.019936000928282738
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,64,7168,0.023871999233961105
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,64,6144,0.02287999913096428
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,64,5120,0.020959999412298203
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,64,6144,0.02284800074994564
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,64,5120,0.020959999412298203
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,64,6144,0.01711999997496605
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,64,5120,0.01532800029963255
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,64,4096,0.019231999292969704
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,64,4096,0.0197759997099638
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,64,65536,0.1136000007390976
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,64,4096,0.017855999991297722
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,64,3584,0.018719999119639397
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,64,3584,0.01727999933063984
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,64,3584,0.019519999623298645
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,64,3072,0.012000000104308128
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,64,3072,0.016863999888300896
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,64,3072,0.017055999487638474
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,64,2560,0.016095999628305435
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,64,2560,0.015296000055968761
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,64,2560,0.012319999746978283
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,64,2048,0.014431999996304512
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,64,2048,0.010304000228643417
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,64,2048,0.014240000396966934
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,64,1536,0.012736000120639801
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,64,1024,0.010944000445306301
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,64,1024,0.011392000131309032
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,64,1536,0.009983999654650688
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,64,1024,0.010015999898314476
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,64,768,0.009983999654650688
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,64,768,0.010432000271975994
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,64,768,0.009088000282645226
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,64,512,0.009503999724984169
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,64,512,0.010367999784648418
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,64,512,0.008383999578654766
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,64,256,0.009119999594986439
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,64,256,0.008799999952316284
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,64,256,0.0074880002066493034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,64,128,0.008671999908983707
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,64,128,0.008448000065982342
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,64,128,0.007199999876320362
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,64,64,0.012608000077307224
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,64,64,0.01235199999064207
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,64,64,0.006943999789655209
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,64,32,0.012512000277638435
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,64,1536,0.012480000033974648
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,64,32,0.012256000190973282
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,64,32,0.00684799998998642
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,32,12288,0.032575998455286026
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,32,12288,0.03190400078892708
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,32,16384,0.03484800085425377
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,32,16384,0.033535998314619064
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,32,12288,0.041728001087903976
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,32,16384,0.052319999784231186
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,32,10240,0.028896000236272812
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,32,10240,0.029472000896930695
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,32,8192,0.024480000138282776
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,32,8192,0.02505600079894066
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,32,10240,0.036159999668598175
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,32,8192,0.028863999992609024
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,32,65536,0.043136000633239746
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,32,65536,0.04476799815893173
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,32,7168,0.023711999878287315
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,32,7168,0.0225600004196167
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,32,6144,0.02287999913096428
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,32,6144,0.022112000733613968
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,32,7168,0.028960000723600388
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,32,6144,0.024447999894618988
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,32,5120,0.020864000543951988
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,32,5120,0.021183999255299568
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,32,4096,0.019360000267624855
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,32,5120,0.02099199965596199
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,32,4096,0.01942400075495243
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,32,65536,0.11107199639081955
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,32,3584,0.018624000251293182
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,32,4096,0.014399999752640724
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,32,3584,0.019648000597953796
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,32,3584,0.017216000705957413
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,32,2560,0.01664000004529953
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,32,3072,0.016863999888300896
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,32,3072,0.017664000391960144
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,32,2560,0.016224000602960587
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,32,3072,0.01571200042963028
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,32,2560,0.013824000023305416
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,32,2048,0.01484800036996603
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,32,2048,0.014783999882638454
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,32,2048,0.012575999833643436
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,32,1536,0.012927999719977379
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,32,1536,0.013024000450968742
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,32,1024,0.010847999714314938
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,32,1536,0.010847999714314938
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,32,1024,0.010975999757647514
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,32,1024,0.009600000455975533
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,32,768,0.01033599954098463
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,32,768,0.010528000071644783
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,32,512,0.009312000125646591
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,32,512,0.009983999654650688
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,32,512,0.00902399979531765
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,32,768,0.009727999567985535
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,32,256,0.009216000325977802
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,32,256,0.008895999751985073
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,32,256,0.0074880002066493034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,32,128,0.008704000152647495
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,32,128,0.008736000396311283
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,32,64,0.012512000277638435
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,32,128,0.007135999854654074
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,32,64,0.012223999947309494
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,32,64,0.00687999976798892
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,48,32,32,0.01206399966031313
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,48,32,32,0.012512000277638435
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,48,32,32,0.007007999811321497
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,65536,12288,0.19871999323368073
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,65536,16384,0.26070401072502136
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,65536,10240,0.24886399507522583
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,65536,16384,0.39161598682403564
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,65536,12288,0.29919999837875366
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,65536,12288,0.28147199749946594
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,65536,10240,0.16847999393939972
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,65536,8192,0.13583999872207642
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,65536,8192,0.20294399559497833
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,65536,7168,0.17721599340438843
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,65536,7168,0.12150400131940842
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,65536,16384,0.36556801199913025
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,65536,10240,0.23676800727844238
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,65536,6144,0.10633599758148193
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,65536,8192,0.1932159960269928
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,65536,6144,0.15513600409030914
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,65536,5120,0.09174399822950363
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,65536,5120,0.13180799782276154
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,65536,4096,0.1069440022110939
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,65536,7168,0.17030400037765503
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,65536,4096,0.07545600086450577
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,65536,5120,0.1260479986667633
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,65536,6144,0.1467839926481247
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,65536,3584,0.0963520035147667
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,65536,3584,0.06774400174617767
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,65536,4096,0.1024319976568222
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,65536,3072,0.08323200047016144
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,65536,3072,0.05862399935722351
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,65536,3584,0.09283199906349182
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,65536,2560,0.07174400240182877
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,65536,2560,0.05321599915623665
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,65536,2048,0.04508800059556961
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,65536,2048,0.05913599953055382
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,65536,3072,0.08150400221347809
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,65536,2560,0.06876800209283829
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,65536,1536,0.04793599992990494
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,65536,1536,0.03718400001525879
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,65536,2048,0.056063998490571976
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,65536,1536,0.044415999203920364
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,65536,1024,0.02828799933195114
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,65536,1024,0.034912001341581345
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,65536,768,0.028863999992609024
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,65536,1024,0.03270399942994118
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,65536,768,0.02454400062561035
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,65536,768,0.027744000777602196
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,65536,512,0.023391999304294586
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,65536,512,0.02070399932563305
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,65536,256,0.01852799952030182
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,65536,512,0.021023999899625778
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,65536,256,0.016607999801635742
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,65536,128,0.013632000423967838
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,65536,256,0.014015999622642994
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,65536,128,0.013344000093638897
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,65536,64,0.016287999227643013
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,65536,128,0.01065600011497736
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,65536,64,0.016127999871969223
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,65536,64,0.008991999551653862
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,65536,32,0.01820800080895424
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,65536,32,0.01929599978029728
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,65536,32,0.011008000001311302
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,16384,12288,0.08249600231647491
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,16384,12288,0.08531200140714645
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,16384,16384,0.1082879975438118
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,16384,16384,0.10435199737548828
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,16384,16384,0.09971199929714203
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,16384,10240,0.0708480030298233
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,16384,12288,0.07836800068616867
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,16384,10240,0.07011199742555618
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,16384,8192,0.04064000025391579
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,16384,8192,0.0575999990105629
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,16384,10240,0.0681919977068901
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,16384,7168,0.050944000482559204
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,16384,8192,0.05555199831724167
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,16384,65536,0.4086399972438812
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,16384,7168,0.05132799968123436
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,16384,6144,0.04678399860858917
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,16384,7168,0.048448000103235245
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,16384,6144,0.04633599892258644
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,16384,5120,0.042080000042915344
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,16384,5120,0.03999999910593033
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,16384,6144,0.04403200000524521
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,16384,65536,0.3930239975452423
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,16384,4096,0.033824000507593155
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,16384,4096,0.03340800106525421
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,16384,5120,0.03779200091958046
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,16384,3584,0.03171199932694435
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,16384,3584,0.0244159996509552
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,16384,4096,0.032735999673604965
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,16384,3584,0.028031999245285988
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,16384,3072,0.027712000533938408
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,16384,3072,0.02751999907195568
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,16384,2560,0.026815999299287796
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,16384,2560,0.020287999883294106
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,16384,3072,0.02598400041460991
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,16384,65536,0.3662720024585724
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,16384,2560,0.022592000663280487
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,16384,2048,0.02316799946129322
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,16384,2048,0.021247999742627144
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,16384,2048,0.02035200037062168
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,16384,1536,0.020128000527620316
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,16384,1536,0.0163199994713068
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,16384,1024,0.016127999871969223
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,16384,1536,0.01744000054895878
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,16384,1024,0.014527999795973301
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,16384,1024,0.014303999952971935
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,16384,768,0.014495999552309513
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,16384,768,0.013248000293970108
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,16384,512,0.011872000060975552
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,16384,512,0.013088000006973743
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,16384,768,0.012927999719977379
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,16384,256,0.010528000071644783
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,16384,512,0.010591999627649784
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,16384,256,0.01065600011497736
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,16384,256,0.009184000082314014
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,16384,128,0.009440000168979168
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,16384,64,0.013952000066637993
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,16384,128,0.008287999778985977
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,16384,64,0.013344000093638897
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,16384,64,0.007327999919652939
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,16384,32,0.013824000023305416
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,16384,32,0.013728000223636627
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,16384,32,0.008031999692320824
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,16384,128,0.009824000298976898
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,12288,12288,0.07257600128650665
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,12288,16384,0.07516799867153168
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,12288,16384,0.09465599805116653
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,12288,12288,0.08262400329113007
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,12288,16384,0.07753600180149078
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,12288,10240,0.07014399766921997
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,12288,12288,0.06255999952554703
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,12288,10240,0.063680000603199
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,12288,8192,0.04899200052022934
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,12288,65536,0.27558401226997375
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,12288,10240,0.05446400120854378
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,12288,8192,0.05769599974155426
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,12288,7168,0.045152001082897186
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,12288,8192,0.04575999826192856
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,12288,7168,0.03622400015592575
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,12288,6144,0.04492799937725067
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,12288,6144,0.034015998244285583
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,12288,65536,0.30211201310157776
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,12288,7168,0.04009599983692169
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,12288,5120,0.038336001336574554
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,12288,6144,0.03500799834728241
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,12288,5120,0.029920000582933426
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,12288,4096,0.031936001032590866
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,12288,4096,0.033984001725912094
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,12288,5120,0.030688000842928886
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,12288,3584,0.028672000393271446
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,12288,3584,0.02409599907696247
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,12288,4096,0.02611199952661991
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,12288,3584,0.024191999807953835
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,12288,65536,0.2799679934978485
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,12288,3072,0.02611199952661991
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,12288,3072,0.021568000316619873
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,12288,2560,0.024351999163627625
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,12288,3072,0.022784000262618065
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,12288,2560,0.019551999866962433
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,12288,2560,0.020287999883294106
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,12288,2048,0.020416000857949257
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,12288,2048,0.017535999417304993
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,12288,1536,0.016383999958634377
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,12288,1536,0.018688000738620758
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,12288,2048,0.0180479995906353
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,12288,1024,0.015456000342965126
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,12288,1024,0.01500799972563982
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,12288,1536,0.016063999384641647
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,12288,1024,0.012927999719977379
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,12288,768,0.013887999579310417
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,12288,768,0.012896000407636166
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,12288,768,0.013535999692976475
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,12288,512,0.012864000163972378
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,12288,512,0.012128000147640705
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,12288,256,0.01065600011497736
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,12288,512,0.009920000098645687
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,12288,256,0.010367999784648418
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,12288,256,0.00854399986565113
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,12288,128,0.009696000255644321
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,12288,128,0.01017600018531084
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,12288,64,0.013791999779641628
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,12288,128,0.007615999784320593
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,12288,64,0.014015999622642994
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,12288,64,0.007424000184983015
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,12288,32,0.013632000423967838
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,12288,32,0.013728000223636627
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,12288,32,0.007807999849319458
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,10240,12288,0.055135998874902725
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,10240,12288,0.059647999703884125
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,10240,16384,0.07468800246715546
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,10240,16384,0.07683199644088745
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,10240,16384,0.0674239993095398
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,10240,12288,0.05564799904823303
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,10240,10240,0.0517439991235733
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,10240,10240,0.0490880012512207
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,10240,10240,0.04886399954557419
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,10240,8192,0.04249599948525429
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,10240,7168,0.038176000118255615
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,10240,8192,0.041471999138593674
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,10240,65536,0.25699201226234436
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,10240,8192,0.039712000638246536
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,10240,7168,0.036031998693943024
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,10240,6144,0.03270399942994118
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,10240,6144,0.03526400029659271
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,10240,7168,0.03564799949526787
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,10240,65536,0.26627200841903687
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,10240,5120,0.03276799991726875
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,10240,6144,0.031231999397277832
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,10240,4096,0.026655999943614006
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,10240,5120,0.03017600066959858
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,10240,4096,0.025248000398278236
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,10240,3584,0.025728000327944756
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,10240,4096,0.023360000923275948
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,10240,3584,0.03097599931061268
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,10240,3584,0.022336000576615334
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,10240,3072,0.023104000836610794
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,10240,3072,0.027424000203609467
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,10240,65536,0.24505600333213806
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,10240,2560,0.020735999569296837
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,10240,2560,0.019360000267624855
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,10240,3072,0.021695999428629875
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,10240,2560,0.019392000511288643
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,10240,2048,0.018719999119639397
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,10240,2048,0.021344000473618507
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,10240,2048,0.01664000004529953
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,10240,1536,0.016896000131964684
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,10240,1536,0.01571200042963028
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,10240,1536,0.014495999552309513
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,10240,5120,0.02768000029027462
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,10240,1024,0.014399999752640724
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,10240,1024,0.013344000093638897
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,10240,768,0.013535999692976475
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,10240,1024,0.012415999546647072
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,10240,768,0.01283199992030859
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,10240,768,0.01244799979031086
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,10240,512,0.01228800043463707
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,10240,512,0.01158399973064661
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,10240,512,0.009600000455975533
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,10240,256,0.010528000071644783
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,10240,256,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,10240,256,0.00863999966531992
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,10240,128,0.01017600018531084
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,10240,128,0.009503999724984169
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,10240,128,0.007968000136315823
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,10240,64,0.013055999763309956
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,10240,64,0.013472000136971474
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,10240,64,0.007360000163316727
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,10240,32,0.013439999893307686
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,10240,32,0.013472000136971474
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,10240,32,0.00774399982765317
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,8192,12288,0.04227200150489807
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,8192,12288,0.054655998945236206
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,8192,16384,0.04940799996256828
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,8192,16384,0.056671999394893646
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,8192,16384,0.05660799890756607
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,8192,12288,0.047488000243902206
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,8192,10240,0.048895999789237976
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,8192,10240,0.03907199949026108
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,8192,10240,0.0414079986512661
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,8192,8192,0.033984001725912094
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,8192,8192,0.042080000042915344
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,8192,65536,0.14131200313568115
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,8192,7168,0.03862399980425835
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,8192,8192,0.03302399814128876
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,8192,7168,0.030751999467611313
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,8192,6144,0.02812799997627735
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,8192,65536,0.20291200280189514
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,8192,7168,0.0307839997112751
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,8192,6144,0.02985600009560585
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,8192,5120,0.026240000501275063
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,8192,5120,0.024320000782608986
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,8192,6144,0.026623999699950218
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,8192,4096,0.021376000717282295
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,8192,4096,0.021503999829292297
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,8192,5120,0.023520000278949738
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,8192,4096,0.0244159996509552
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,8192,3584,0.019200000911951065
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,8192,3584,0.020800000056624413
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,8192,65536,0.19625599682331085
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,8192,3584,0.01897599920630455
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,8192,3072,0.024800000712275505
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,8192,3072,0.017952000722289085
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,8192,3072,0.017952000722289085
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,8192,2560,0.0161920003592968
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,8192,2560,0.017023999243974686
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,8192,2560,0.01539199985563755
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,8192,2048,0.020128000527620316
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,8192,2048,0.015039999969303608
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,8192,2048,0.014816000126302242
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,8192,1536,0.013919999822974205
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,8192,1536,0.013439999893307686
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,8192,1024,0.017184000462293625
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,8192,1536,0.012927999719977379
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,8192,1024,0.011807999573647976
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,8192,768,0.011680000461637974
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,8192,1024,0.01206399966031313
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,8192,768,0.01142400037497282
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,8192,768,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,8192,512,0.012032000347971916
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,8192,512,0.010495999827980995
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,8192,512,0.009983999654650688
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,8192,256,0.009535999968647957
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,8192,256,0.009503999724984169
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,8192,128,0.009472000412642956
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,8192,256,0.00848000030964613
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,8192,128,0.009184000082314014
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,8192,64,0.013088000006973743
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,8192,128,0.007679999805986881
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,8192,64,0.012736000120639801
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,8192,64,0.0071680000983178616
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,8192,32,0.01398400031030178
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,8192,32,0.01283199992030859
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,8192,32,0.007519999984651804
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,7168,12288,0.038336001336574554
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,7168,12288,0.04403200000524521
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,7168,16384,0.04633599892258644
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,7168,16384,0.051072001457214355
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,7168,12288,0.042047999799251556
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,7168,10240,0.03884800150990486
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,7168,10240,0.03657599911093712
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,7168,8192,0.030527999624609947
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,7168,10240,0.03824000060558319
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,7168,16384,0.05756799876689911
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,7168,8192,0.03206399828195572
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,7168,65536,0.1260479986667633
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,7168,8192,0.030208000913262367
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,7168,65536,0.18838399648666382
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,7168,7168,0.028672000393271446
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,7168,7168,0.030112000182271004
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,7168,7168,0.02723200060427189
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,7168,6144,0.028192000463604927
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,7168,6144,0.028095999732613564
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,7168,5120,0.025312000885605812
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,7168,6144,0.0244159996509552
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,7168,5120,0.024992000311613083
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,7168,5120,0.021503999829292297
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,7168,4096,0.0208320003002882
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,7168,4096,0.02179200015962124
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,7168,4096,0.020576000213623047
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,7168,3584,0.019680000841617584
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,7168,3584,0.01961600035429001
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,7168,65536,0.17791999876499176
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,7168,3072,0.01759999990463257
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,7168,3072,0.01788800023496151
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,7168,3584,0.01744000054895878
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,7168,3072,0.01539199985563755
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,7168,2560,0.016607999801635742
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,7168,2560,0.01648000068962574
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,7168,2048,0.014976000413298607
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,7168,2560,0.01571200042963028
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,7168,2048,0.014560000039637089
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,7168,1536,0.014208000153303146
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,7168,2048,0.013952000066637993
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,7168,1536,0.01398400031030178
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,7168,1536,0.01190400030463934
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,7168,1024,0.012384000234305859
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,7168,1024,0.01190400030463934
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,7168,768,0.010879999957978725
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,7168,1024,0.01152000017464161
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,7168,768,0.011168000288307667
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,7168,512,0.01033599954098463
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,7168,768,0.011807999573647976
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,7168,512,0.01027199998497963
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,7168,512,0.009824000298976898
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,7168,256,0.009088000282645226
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,7168,256,0.00940799992531538
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,7168,128,0.008352000266313553
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,7168,256,0.008415999822318554
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,7168,128,0.009216000325977802
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,7168,128,0.007519999984651804
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,7168,64,0.012896000407636166
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,7168,64,0.012959999963641167
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,7168,64,0.007424000184983015
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,7168,32,0.012992000207304955
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,7168,32,0.013311999849975109
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,7168,32,0.007424000184983015
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,6144,12288,0.034304000437259674
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,6144,12288,0.04508800059556961
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,6144,16384,0.041120000183582306
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,6144,16384,0.05344000086188316
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,6144,16384,0.045791998505592346
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,6144,12288,0.038816001266241074
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,6144,10240,0.03235200047492981
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,6144,10240,0.03814399987459183
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,6144,10240,0.034591998904943466
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,6144,8192,0.03561599925160408
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,6144,8192,0.027327999472618103
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,6144,7168,0.029055999591946602
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,6144,8192,0.02816000021994114
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,6144,65536,0.11107199639081955
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,6144,7168,0.025855999439954758
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,6144,6144,0.02518399991095066
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,6144,65536,0.16790400445461273
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,6144,6144,0.03248000144958496
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,6144,7168,0.025728000327944756
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,6144,5120,0.02768000029027462
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,6144,6144,0.022495999932289124
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,6144,5120,0.023840000852942467
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,6144,4096,0.021503999829292297
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,6144,4096,0.020800000056624413
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,6144,5120,0.020767999812960625
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,6144,3584,0.023711999878287315
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,6144,3584,0.01929599978029728
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,6144,4096,0.018912000581622124
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,6144,3584,0.016736000776290894
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,6144,65536,0.1589760035276413
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,6144,3072,0.018112000077962875
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,6144,3072,0.017952000722289085
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,6144,2560,0.01616000011563301
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,6144,3072,0.01500799972563982
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,6144,2560,0.0161920003592968
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,6144,2048,0.01961600035429001
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,6144,2560,0.016831999644637108
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,6144,1536,0.014208000153303146
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,6144,2048,0.013632000423967838
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,6144,1536,0.012992000207304955
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,6144,1024,0.011615999974310398
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,6144,1536,0.011776000261306763
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,6144,1024,0.011615999974310398
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,6144,1024,0.011648000217974186
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,6144,768,0.01119999960064888
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,6144,768,0.010879999957978725
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,6144,768,0.011807999573647976
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,6144,512,0.010143999941647053
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,6144,512,0.009952000342309475
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,6144,512,0.010048000141978264
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,6144,256,0.009696000255644321
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,6144,256,0.009535999968647957
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,6144,128,0.00940799992531538
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,6144,256,0.007935999892652035
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,6144,128,0.00886400043964386
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,6144,128,0.007519999984651804
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,6144,64,0.012768000364303589
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,6144,64,0.013151999562978745
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,6144,2048,0.015263999812304974
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,6144,64,0.0071680000983178616
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,6144,32,0.013311999849975109
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,6144,32,0.012959999963641167
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,6144,32,0.007455999962985516
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,5120,12288,0.033055998384952545
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,5120,12288,0.04134399816393852
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,5120,16384,0.03721600025892258
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,5120,16384,0.04771199822425842
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,5120,16384,0.041728001087903976
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,5120,10240,0.03763199970126152
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,5120,12288,0.035679999738931656
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,5120,10240,0.02940800040960312
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,5120,8192,0.03139200061559677
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,5120,10240,0.03267199918627739
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,5120,8192,0.025567999109625816
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,5120,7168,0.029023999348282814
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,5120,65536,0.09731200337409973
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,5120,8192,0.026464000344276428
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,5120,7168,0.023679999634623528
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,5120,65536,0.1438400000333786
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,5120,6144,0.027264000847935677
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,5120,7168,0.025312000885605812
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,5120,6144,0.023135999217629433
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,5120,5120,0.025280000641942024
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,5120,5120,0.022463999688625336
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,5120,6144,0.024224000051617622
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,5120,5120,0.01945599913597107
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,5120,4096,0.02143999934196472
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,5120,4096,0.020479999482631683
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,5120,4096,0.016575999557971954
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,5120,3584,0.02239999920129776
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,5120,3584,0.01945599913597107
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,5120,3584,0.016063999384641647
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,5120,3072,0.017503999173641205
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,5120,65536,0.14204800128936768
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,5120,3072,0.01744000054895878
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,5120,3072,0.018432000651955605
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,5120,2560,0.01616000011563301
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,5120,2560,0.016416000202298164
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,5120,2560,0.014175999909639359
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,5120,2048,0.014560000039637089
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,5120,2048,0.014911999925971031
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,5120,1536,0.01772800087928772
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,5120,1536,0.014368000440299511
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,5120,2048,0.014336000196635723
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,5120,1024,0.011839999817311764
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,5120,1536,0.012000000104308128
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,5120,1024,0.011455999687314034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,5120,768,0.014495999552309513
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,5120,1024,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,5120,768,0.010975999757647514
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,5120,512,0.011872000060975552
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,5120,512,0.010048000141978264
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,5120,768,0.011776000261306763
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,5120,512,0.009375999681651592
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,5120,256,0.00902399979531765
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,5120,256,0.009184000082314014
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,5120,256,0.008415999822318554
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,5120,128,0.009312000125646591
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,5120,128,0.009824000298976898
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,5120,64,0.013952000066637993
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,5120,128,0.007679999805986881
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,5120,64,0.012671999633312225
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,5120,32,0.013407999649643898
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,5120,64,0.007199999876320362
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,5120,32,0.012543999589979649
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,5120,32,0.0074880002066493034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,4096,12288,0.03500799834728241
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,4096,16384,0.04275200143456459
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,4096,16384,0.0331839993596077
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,4096,16384,0.03977600112557411
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,4096,12288,0.033824000507593155
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,4096,10240,0.03283200040459633
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,4096,10240,0.025631999596953392
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,4096,10240,0.029791999608278275
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,4096,8192,0.027712000533938408
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,4096,65536,0.08163200318813324
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,4096,8192,0.02377600036561489
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,4096,65536,0.12080000340938568
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,4096,12288,0.028224000707268715
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,4096,8192,0.02566399984061718
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,4096,7168,0.026976000517606735
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,4096,7168,0.028991999104619026
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,4096,6144,0.02271999977529049
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,4096,5120,0.024191999807953835
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,4096,6144,0.027103999629616737
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,4096,7168,0.026655999943614006
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,4096,6144,0.021247999742627144
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,4096,5120,0.023360000923275948
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,4096,4096,0.02143999934196472
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,4096,4096,0.019967999309301376
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,4096,65536,0.12931199371814728
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,4096,3584,0.01945599913597107
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,4096,5120,0.017983999103307724
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,4096,3584,0.019071999937295914
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,4096,4096,0.01568000018596649
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,4096,3584,0.014495999552309513
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,4096,3072,0.01759999990463257
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,4096,3072,0.017376000061631203
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,4096,3072,0.01369599997997284
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,4096,2560,0.016383999958634377
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,4096,2560,0.016543999314308167
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,4096,2048,0.014592000283300877
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,4096,2560,0.014047999866306782
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,4096,2048,0.014592000283300877
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,4096,1536,0.012864000163972378
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,4096,2048,0.01244799979031086
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,4096,1536,0.013279999606311321
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,4096,1024,0.011711999773979187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,4096,1536,0.011264000087976456
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,4096,1024,0.011680000461637974
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,4096,1024,0.010239999741315842
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,4096,768,0.010847999714314938
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,4096,768,0.01056000031530857
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,4096,768,0.01142400037497282
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,4096,512,0.009952000342309475
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,4096,512,0.01033599954098463
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,4096,512,0.009727999567985535
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,4096,256,0.009503999724984169
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,4096,256,0.009216000325977802
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,4096,128,0.00863999966531992
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,4096,256,0.008031999692320824
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,4096,128,0.009824000298976898
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,4096,64,0.012640000320971012
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,4096,128,0.007584000006318092
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,4096,64,0.012864000163972378
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,4096,64,0.00723200011998415
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,4096,32,0.012799999676644802
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,4096,32,0.012512000277638435
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,4096,32,0.00723200011998415
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,3584,12288,0.03302399814128876
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,3584,12288,0.027583999559283257
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,3584,16384,0.03836800158023834
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,3584,16384,0.03129599988460541
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,3584,16384,0.03702399879693985
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,3584,12288,0.03129599988460541
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,3584,10240,0.0307839997112751
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,3584,10240,0.025760000571608543
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,3584,10240,0.029023999348282814
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,3584,8192,0.028384000062942505
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,3584,8192,0.023360000923275948
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,3584,65536,0.07366400212049484
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,3584,7168,0.026208000257611275
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,3584,8192,0.033824000507593155
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,3584,65536,0.10998400300741196
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,3584,7168,0.022175999358296394
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,3584,6144,0.02579200081527233
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,3584,6144,0.02348800003528595
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,3584,5120,0.023840000852942467
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,3584,7168,0.024607999250292778
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,3584,5120,0.02195199951529503
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,3584,6144,0.023104000836610794
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,3584,5120,0.019231999292969704
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,3584,4096,0.022016000002622604
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,3584,3584,0.019231999292969704
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,3584,3584,0.01894400082528591
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,3584,4096,0.02067199908196926
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,3584,4096,0.016448000445961952
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,3584,65536,0.12387199699878693
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,3584,3584,0.019360000267624855
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,3584,3072,0.018144000321626663
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,3584,3072,0.01744000054895878
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,3584,2560,0.015936000272631645
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,3584,2560,0.016704000532627106
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,3584,2560,0.01235199999064207
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,3584,3072,0.014303999952971935
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,3584,2048,0.015552000142633915
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,3584,2048,0.015072000212967396
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,3584,2048,0.011711999773979187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,3584,1536,0.013376000337302685
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,3584,1536,0.013535999692976475
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,3584,1024,0.011552000418305397
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,3584,1536,0.01119999960064888
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,3584,1024,0.01152000017464161
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,3584,768,0.010975999757647514
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,3584,768,0.010528000071644783
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,3584,1024,0.00979200005531311
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,3584,768,0.009375999681651592
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,3584,512,0.009920000098645687
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,3584,512,0.009824000298976898
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,3584,256,0.009151999838650227
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,3584,256,0.009759999811649323
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,3584,128,0.009247999638319016
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,3584,256,0.008063999935984612
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,3584,128,0.008799999952316284
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,3584,128,0.0072639998979866505
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,3584,64,0.01244799979031086
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,3584,64,0.012543999589979649
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,3584,64,0.007040000054985285
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,3584,32,0.013183999806642532
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,3584,32,0.012864000163972378
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,3584,32,0.007327999919652939
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,3072,12288,0.031936001032590866
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,3072,16384,0.03062400035560131
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,3584,512,0.009824000298976898
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,3072,16384,0.036639999598264694
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,3072,16384,0.035711999982595444
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,3072,12288,0.02659199945628643
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,3072,10240,0.03017600066959858
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,3072,12288,0.030432000756263733
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,3072,10240,0.025760000571608543
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,3072,8192,0.026335999369621277
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,3072,10240,0.03296000137925148
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,3072,65536,0.06678400188684464
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,3072,8192,0.025536000728607178
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,3072,7168,0.02675200067460537
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,3072,65536,0.09577599912881851
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,3072,8192,0.033984001725912094
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,3072,7168,0.023455999791622162
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,3072,6144,0.024224000051617622
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,3072,6144,0.02319999970495701
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,3072,7168,0.03097599931061268
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,3072,5120,0.022975999861955643
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,3072,5120,0.021983999758958817
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,3072,6144,0.01894400082528591
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,3072,4096,0.022207999601960182
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,3072,5120,0.019648000597953796
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,3072,4096,0.020927999168634415
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,3072,65536,0.11900799721479416
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,3072,4096,0.019807999953627586
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,3072,3584,0.021888000890612602
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,3072,3584,0.01990400068461895
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,3072,3584,0.018464000895619392
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,3072,3072,0.018719999119639397
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,3072,3072,0.017920000478625298
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,3072,2560,0.015776000916957855
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,3072,3072,0.016672000288963318
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,3072,2560,0.015904000028967857
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,3072,2048,0.015039999969303608
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,3072,2560,0.01500799972563982
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,3072,2048,0.014944000169634819
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,3072,1536,0.013728000223636627
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,3072,2048,0.011872000060975552
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,3072,1536,0.013088000006973743
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,3072,1024,0.012032000347971916
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,3072,1024,0.011744000017642975
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,3072,1536,0.012799999676644802
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,3072,768,0.011008000001311302
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,3072,1024,0.010847999714314938
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,3072,768,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,3072,768,0.010944000445306301
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,3072,512,0.009855999611318111
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,3072,256,0.009664000011980534
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,3072,512,0.010400000028312206
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,3072,256,0.009312000125646591
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,3072,512,0.009503999724984169
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,3072,256,0.008031999692320824
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,3072,128,0.008960000239312649
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,3072,128,0.008576000109314919
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,3072,64,0.012671999633312225
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,3072,128,0.007872000336647034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,3072,64,0.013279999606311321
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,3072,32,0.012703999876976013
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,3072,64,0.007104000076651573
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,3072,32,0.012480000033974648
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,3072,32,0.0074880002066493034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,2560,12288,0.030112000182271004
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,2560,12288,0.027264000847935677
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,2560,16384,0.03625600039958954
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,2560,16384,0.029664000496268272
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,2560,16384,0.03510399907827377
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,2560,12288,0.04710400104522705
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,2560,10240,0.028991999104619026
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,2560,10240,0.024671999737620354
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,2560,10240,0.03187200054526329
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,2560,8192,0.026367999613285065
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,2560,8192,0.022592000663280487
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,2560,7168,0.025599999353289604
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,2560,8192,0.03315199911594391
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,2560,65536,0.08569599688053131
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,2560,7168,0.021824000403285027
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,2560,65536,0.058559998869895935
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,2560,7168,0.030719999223947525
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,2560,6144,0.02534399926662445
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,2560,6144,0.022272000089287758
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,2560,5120,0.02147199958562851
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,2560,6144,0.026240000501275063
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,2560,5120,0.022816000506281853
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,2560,4096,0.020255999639630318
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,2560,4096,0.020479999482631683
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,2560,3584,0.019071999937295914
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,2560,3584,0.01894400082528591
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,2560,4096,0.01945599913597107
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,2560,65536,0.11635199934244156
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,2560,3072,0.017664000391960144
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,2560,3584,0.018015999346971512
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,2560,3072,0.017823999747633934
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,2560,2560,0.01696000061929226
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,2560,2560,0.016416000202298164
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,2560,3072,0.01635199971497059
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,2560,2560,0.014944000169634819
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,2560,5120,0.021663999184966087
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,2560,2048,0.014688000082969666
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,2560,2048,0.015039999969303608
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,2560,1536,0.013311999849975109
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,2560,2048,0.013344000093638897
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,2560,1536,0.013151999562978745
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,2560,1536,0.011071999557316303
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,2560,1024,0.011872000060975552
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,2560,1024,0.011776000261306763
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,2560,768,0.011008000001311302
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,2560,1024,0.010400000028312206
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,2560,768,0.01056000031530857
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,2560,768,0.011615999974310398
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,2560,512,0.010111999697983265
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,2560,256,0.009440000168979168
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,2560,512,0.010111999697983265
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,2560,256,0.009440000168979168
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,2560,512,0.009855999611318111
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,2560,256,0.008031999692320824
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,2560,128,0.008767999708652496
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,2560,128,0.009279999881982803
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,2560,64,0.012480000033974648
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,2560,128,0.007519999984651804
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,2560,64,0.012927999719977379
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,2560,64,0.007296000141650438
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,2560,32,0.012575999833643436
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,2560,32,0.012512000277638435
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,2560,32,0.007455999962985516
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,2048,12288,0.027936000376939774
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,2048,12288,0.025248000398278236
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,2048,16384,0.032416000962257385
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,2048,16384,0.031808000057935715
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,2048,16384,0.05596800148487091
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,2048,12288,0.03420799970626831
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,2048,10240,0.02783999964594841
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,2048,10240,0.02518399991095066
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,2048,10240,0.03993599861860275
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,2048,8192,0.023391999304294586
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,2048,8192,0.021856000646948814
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,2048,8192,0.033440001308918
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,2048,65536,0.07583999633789062
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,2048,7168,0.02143999934196472
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,2048,65536,0.0772479996085167
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,2048,7168,0.0208320003002882
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,2048,6144,0.021247999742627144
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,2048,7168,0.030400000512599945
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,2048,6144,0.020576000213623047
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,2048,5120,0.020640000700950623
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,2048,6144,0.017503999173641205
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,2048,5120,0.02035200037062168
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,2048,4096,0.02236800082027912
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,2048,5120,0.02252800017595291
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,2048,4096,0.019168000668287277
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,2048,3584,0.018751999363303185
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,2048,4096,0.0191040001809597
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,2048,65536,0.1149120032787323
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,2048,3584,0.01897599920630455
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,2048,3072,0.01711999997496605
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,2048,3584,0.018112000077962875
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,2048,3072,0.0180479995906353
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,2048,3072,0.013824000023305416
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,2048,2560,0.015936000272631645
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,2048,2560,0.017503999173641205
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,2048,2560,0.015263999812304974
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,2048,2048,0.015359999611973763
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,2048,1536,0.013120000250637531
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,2048,2048,0.013728000223636627
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,2048,1536,0.013055999763309956
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,2048,2048,0.014527999795973301
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,2048,1536,0.01104000024497509
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,2048,1024,0.011231999844312668
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,2048,1024,0.011648000217974186
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,2048,1024,0.01065600011497736
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,2048,768,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,2048,768,0.011103999800980091
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,2048,512,0.00979200005531311
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,2048,768,0.011071999557316303
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,2048,512,0.00979200005531311
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,2048,256,0.009119999594986439
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,2048,256,0.009151999838650227
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,2048,512,0.009312000125646591
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,2048,256,0.00800000037997961
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,2048,128,0.009312000125646591
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,2048,128,0.008608000352978706
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,2048,128,0.007135999854654074
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,2048,64,0.012799999676644802
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,2048,64,0.007296000141650438
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,2048,32,0.012480000033974648
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,2048,32,0.012384000234305859
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,2048,32,0.007296000141650438
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,1536,12288,0.02534399926662445
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,1536,16384,0.031808000057935715
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,1536,16384,0.027807999402284622
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,2048,64,0.013024000450968742
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,1536,12288,0.024320000782608986
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,1536,16384,0.05503999814391136
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,1536,12288,0.03420799970626831
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,1536,10240,0.028991999104619026
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,1536,10240,0.02348800003528595
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,1536,8192,0.021727999672293663
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,1536,10240,0.039135999977588654
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,1536,8192,0.021888000890612602
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,1536,65536,0.061184000223875046
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,1536,65536,0.061184000223875046
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,1536,8192,0.033215999603271484
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,1536,7168,0.02208000048995018
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,1536,7168,0.021023999899625778
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,1536,6144,0.02051199972629547
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,1536,6144,0.026784000918269157
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,1536,7168,0.02953599952161312
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,1536,5120,0.020447999238967896
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,1536,5120,0.020735999569296837
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,1536,6144,0.025312000885605812
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,1536,65536,0.114656001329422
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,1536,4096,0.01942400075495243
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,1536,4096,0.01990400068461895
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,1536,5120,0.02223999984562397
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,1536,4096,0.019007999449968338
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,1536,3584,0.018848000094294548
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,1536,3584,0.018239999189972878
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,1536,3584,0.01945599913597107
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,1536,3072,0.017535999417304993
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,1536,2560,0.015776000916957855
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,1536,3072,0.01744000054895878
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,1536,3072,0.016383999958634377
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,1536,2560,0.016575999557971954
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,1536,2560,0.014495999552309513
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,1536,2048,0.014944000169634819
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,1536,2048,0.015168000012636185
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,1536,2048,0.013024000450968742
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,1536,1536,0.012768000364303589
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,1536,1536,0.013311999849975109
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,1536,1024,0.011807999573647976
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,1536,1536,0.01206399966031313
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,1536,1024,0.011392000131309032
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,1536,768,0.01142400037497282
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,1536,1024,0.009247999638319016
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,1536,768,0.010367999784648418
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,1536,512,0.010048000141978264
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,1536,512,0.010080000385642052
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,1536,768,0.011071999557316303
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,1536,512,0.010208000428974628
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,1536,256,0.009472000412642956
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,1536,256,0.009631999768316746
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,1536,256,0.0077760000713169575
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,1536,128,0.008608000352978706
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,1536,128,0.008960000239312649
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,1536,64,0.012384000234305859
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,1536,128,0.007519999984651804
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,1536,64,0.012896000407636166
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,1536,64,0.007135999854654074
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,1536,32,0.012896000407636166
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,1536,32,0.01235199999064207
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,1536,32,0.007360000163316727
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,1024,12288,0.03331200033426285
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,1024,12288,0.02287999913096428
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,1024,16384,0.03558399900794029
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,1024,16384,0.026048000901937485
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,1024,16384,0.054816000163555145
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,1024,12288,0.027744000777602196
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,1024,10240,0.02300800010561943
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,1024,10240,0.022752000018954277
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,1024,10240,0.03888000175356865
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,1024,8192,0.022495999932289124
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,1024,8192,0.02127999998629093
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,1024,8192,0.03244800120592117
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,1024,65536,0.04927999898791313
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,1024,65536,0.05100800096988678
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,1024,7168,0.021983999758958817
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,1024,7168,0.021183999255299568
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,1024,6144,0.022336000576615334
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,1024,7168,0.02937600016593933
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,1024,6144,0.021663999184966087
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,1024,6144,0.025439999997615814
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,1024,5120,0.021183999255299568
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,1024,5120,0.02191999927163124
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,1024,4096,0.021344000473618507
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,1024,5120,0.02208000048995018
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,1024,65536,0.11324799805879593
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,1024,4096,0.02115200087428093
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,1024,4096,0.018783999606966972
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,1024,3584,0.020031999796628952
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,1024,3072,0.017791999503970146
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,1024,3584,0.01961600035429001
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,1024,3072,0.01724799908697605
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,1024,3072,0.01583999954164028
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,1024,3584,0.017632000148296356
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,1024,2560,0.015456000342965126
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,1024,2560,0.015968000516295433
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,1024,2048,0.01500799972563982
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,1024,2048,0.01484800036996603
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,1024,2048,0.011231999844312668
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,1024,1536,0.013663999736309052
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,1024,1536,0.012736000120639801
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,1024,1024,0.01152000017464161
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,1024,1536,0.011231999844312668
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,1024,1024,0.011711999773979187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,1024,1024,0.010239999741315842
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,1024,768,0.010912000201642513
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,1024,768,0.011455999687314034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,1024,768,0.010944000445306301
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,1024,512,0.00979200005531311
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,1024,512,0.009503999724984169
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,1024,512,0.00940799992531538
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,1024,256,0.009279999881982803
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,1024,256,0.009119999594986439
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,1024,256,0.008287999778985977
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,1024,128,0.008736000396311283
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,1024,2560,0.014944000169634819
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,1024,128,0.009440000168979168
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,1024,128,0.007040000054985285
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,1024,64,0.012896000407636166
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,1024,64,0.01283199992030859
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,1024,64,0.006943999789655209
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,1024,32,0.012671999633312225
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,1024,32,0.007104000076651573
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,1024,32,0.01235199999064207
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,768,12288,0.024320000782608986
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,768,12288,0.024607999250292778
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,768,16384,0.02675200067460537
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,768,16384,0.026784000918269157
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,768,16384,0.054368000477552414
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,768,10240,0.024512000381946564
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,768,12288,0.044224001467227936
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,768,10240,0.023296000435948372
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,768,8192,0.023744000121951103
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,768,10240,0.038495998829603195
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,768,65536,0.04281599819660187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,768,8192,0.023455999791622162
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,768,8192,0.03235200047492981
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,768,65536,0.05555199831724167
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,768,7168,0.02304000034928322
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,768,7168,0.02348800003528595
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,768,6144,0.02300800010561943
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,768,7168,0.02937600016593933
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,768,6144,0.024447999894618988
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,768,5120,0.02223999984562397
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,768,5120,0.022304000332951546
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,768,5120,0.021824000403285027
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,768,6144,0.024768000468611717
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,768,4096,0.020416000857949257
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,768,65536,0.11238399893045425
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,768,4096,0.02099199965596199
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,768,4096,0.01929599978029728
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,768,3584,0.019519999623298645
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,768,3584,0.019392000511288643
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,768,3584,0.0180479995906353
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,768,3072,0.01756799966096878
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,768,3072,0.017632000148296356
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,768,3072,0.016127999871969223
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,768,2560,0.01635199971497059
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,768,2560,0.016063999384641647
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,768,2560,0.014720000326633453
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,768,2048,0.014368000440299511
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,768,2048,0.014911999925971031
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,768,1536,0.012480000033974648
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,768,1536,0.01321600005030632
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,768,2048,0.013151999562978745
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,768,1024,0.011392000131309032
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,768,1536,0.011615999974310398
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,768,1024,0.011392000131309032
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,768,1024,0.008927999995648861
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,768,768,0.011296000331640244
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,768,768,0.010367999784648418
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,768,512,0.009696000255644321
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,768,768,0.010623999871313572
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,768,256,0.009056000038981438
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,768,512,0.009727999567985535
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,768,512,0.009056000038981438
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,768,256,0.008960000239312649
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,768,128,0.008287999778985977
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,768,256,0.008031999692320824
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,768,128,0.008608000352978706
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,768,128,0.007104000076651573
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,768,64,0.01235199999064207
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,768,64,0.012608000077307224
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,768,32,0.012671999633312225
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,768,64,0.007104000076651573
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,768,32,0.012191999703645706
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,768,32,0.007327999919652939
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,512,12288,0.029600000008940697
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,512,16384,0.029952000826597214
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,512,16384,0.030112000182271004
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,512,16384,0.05353600159287453
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,512,12288,0.04492799937725067
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,512,10240,0.027615999802947044
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,512,10240,0.029184000566601753
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,512,8192,0.027424000203609467
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,512,10240,0.03843199834227562
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,512,12288,0.028511999174952507
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,512,8192,0.026367999613285065
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,512,65536,0.040800001472234726
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,512,65536,0.05984000116586685
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,512,8192,0.031968001276254654
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,512,7168,0.025312000885605812
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,512,7168,0.025087999179959297
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,512,6144,0.02473600022494793
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,512,6144,0.023264000192284584
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,512,7168,0.029440000653266907
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,512,6144,0.02473600022494793
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,512,5120,0.02284800074994564
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,512,5120,0.0225600004196167
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,512,4096,0.022016000002622604
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,512,4096,0.020416000857949257
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,512,65536,0.11289600282907486
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,512,5120,0.02131200022995472
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,512,3584,0.018400000408291817
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,512,3584,0.019039999693632126
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,512,4096,0.01926399953663349
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,512,3072,0.017983999103307724
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,512,3584,0.01775999926030636
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,512,3072,0.017503999173641205
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,512,2560,0.016448000445961952
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,512,3072,0.015807999297976494
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,512,2560,0.015807999297976494
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,512,2560,0.012319999746978283
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,512,2048,0.01408000010997057
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,512,2048,0.014816000126302242
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,512,2048,0.012896000407636166
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,512,1536,0.012864000163972378
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,512,1536,0.012575999833643436
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,512,1536,0.011327999643981457
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,512,1024,0.011168000288307667
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,512,1024,0.01142400037497282
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,512,1024,0.008960000239312649
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,512,768,0.010623999871313572
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,512,512,0.009472000412642956
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,512,768,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,512,512,0.0098879998549819
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,512,768,0.01065600011497736
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,512,256,0.009151999838650227
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,512,256,0.009056000038981438
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,512,512,0.00848000030964613
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,512,256,0.007903999648988247
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,512,128,0.008448000065982342
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,512,128,0.009759999811649323
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,512,128,0.00723200011998415
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,512,64,0.012608000077307224
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,512,64,0.012640000320971012
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,512,64,0.007040000054985285
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,512,32,0.012191999703645706
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,512,32,0.012671999633312225
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,512,32,0.0071680000983178616
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,256,12288,0.03270399942994118
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,256,12288,0.03267199918627739
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,256,16384,0.04028800129890442
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,256,16384,0.04118400067090988
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,256,16384,0.054368000477552414
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,256,12288,0.04476799815893173
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,256,10240,0.028896000236272812
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,256,10240,0.029152000322937965
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,256,8192,0.024831999093294144
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,256,10240,0.03907199949026108
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,256,8192,0.025087999179959297
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,256,8192,0.03267199918627739
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,256,65536,0.05011200159788132
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,256,65536,0.04739199951291084
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,256,7168,0.02380800060927868
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,256,7168,0.023711999878287315
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,256,6144,0.02332800067961216
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,256,7168,0.029343999922275543
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,256,6144,0.022431999444961548
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,256,5120,0.02163200080394745
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,256,5120,0.02131200022995472
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,256,4096,0.02022399939596653
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,256,6144,0.025472000241279602
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,256,65536,0.11289600282907486
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,256,5120,0.021888000890612602
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,256,4096,0.019807999953627586
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,256,4096,0.018688000738620758
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,256,3584,0.019999999552965164
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,256,3584,0.018271999433636665
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,256,3584,0.017472000792622566
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,256,3072,0.01788800023496151
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,256,3072,0.017696000635623932
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,256,2560,0.01679999940097332
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,256,3072,0.015776000916957855
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,256,2560,0.01583999954164028
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,256,2048,0.014911999925971031
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,256,2560,0.014431999996304512
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,256,2048,0.014688000082969666
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,256,2048,0.013055999763309956
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,256,1536,0.013439999893307686
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,256,1536,0.012543999589979649
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,256,1536,0.010143999941647053
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,256,1024,0.01158399973064661
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,256,1024,0.01152000017464161
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,256,1024,0.009503999724984169
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,256,768,0.011168000288307667
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,256,768,0.010847999714314938
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,256,512,0.009503999724984169
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,256,512,0.009631999768316746
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,256,256,0.008960000239312649
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,256,512,0.008927999995648861
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,256,256,0.009088000282645226
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,256,256,0.008031999692320824
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,256,128,0.008799999952316284
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,256,128,0.008576000109314919
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,256,128,0.007391999941319227
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,256,64,0.01228800043463707
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,256,64,0.012512000277638435
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,256,64,0.007104000076651573
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,256,32,0.01283199992030859
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,256,32,0.01244799979031086
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,256,32,0.007391999941319227
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,256,768,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,128,12288,0.03270399942994118
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,128,16384,0.04054399952292442
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,128,16384,0.039744000881910324
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,128,12288,0.03167999908328056
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,128,16384,0.055135998874902725
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,128,12288,0.044544000178575516
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,128,10240,0.02879999950528145
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,128,10240,0.028960000723600388
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,128,10240,0.0387520007789135
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,128,8192,0.025728000327944756
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,128,65536,0.04896000027656555
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,128,8192,0.024064000695943832
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,128,65536,0.04902400076389313
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,128,7168,0.024288000538945198
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,128,8192,0.03251200169324875
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,128,7168,0.022975999861955643
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,128,6144,0.02271999977529049
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,128,6144,0.022304000332951546
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,128,7168,0.029343999922275543
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,128,5120,0.021183999255299568
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,128,5120,0.021376000717282295
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,128,6144,0.02489599958062172
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,128,4096,0.018751999363303185
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,128,5120,0.02223999984562397
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,128,4096,0.018239999189972878
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,128,65536,0.11273600161075592
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,128,3584,0.02054399996995926
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,128,4096,0.01945599913597107
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,128,3584,0.018432000651955605
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,128,3584,0.017696000635623932
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,128,3072,0.018271999433636665
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,128,3072,0.017696000635623932
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,128,2560,0.01648000068962574
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,128,3072,0.01548799965530634
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,128,2560,0.015776000916957855
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,128,2560,0.014399999752640724
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,128,2048,0.014592000283300877
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,128,2048,0.014655999839305878
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,128,2048,0.012256000190973282
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,128,1536,0.01283199992030859
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,128,1536,0.013055999763309956
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,128,1536,0.011071999557316303
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,128,1024,0.01196799986064434
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,128,1024,0.011392000131309032
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,128,1024,0.009088000282645226
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,128,768,0.010528000071644783
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,128,768,0.010432000271975994
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,128,512,0.010015999898314476
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,128,768,0.010367999784648418
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,128,512,0.00940799992531538
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,128,512,0.008704000152647495
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,128,256,0.009056000038981438
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,128,256,0.009344000369310379
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,128,128,0.008352000266313553
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,128,256,0.007807999849319458
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,128,128,0.008927999995648861
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,128,128,0.007199999876320362
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,128,64,0.012480000033974648
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,128,64,0.011711999773979187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,128,64,0.00723200011998415
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,128,32,0.012671999633312225
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,128,32,0.012191999703645706
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,128,32,0.007327999919652939
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,64,12288,0.03167999908328056
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,64,12288,0.032575998455286026
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,64,16384,0.03814399987459183
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,64,16384,0.03920000046491623
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,64,16384,0.05395200103521347
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,64,12288,0.04355200007557869
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,64,10240,0.028351999819278717
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,64,10240,0.027168000116944313
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,64,8192,0.02550400048494339
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,64,10240,0.03859199956059456
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,64,65536,0.04950400069355965
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,64,8192,0.024960000067949295
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,64,65536,0.047488000243902206
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,64,7168,0.023072000592947006
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,64,8192,0.0318400003015995
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,64,7168,0.02236800082027912
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,64,6144,0.02316799946129322
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,64,6144,0.02239999920129776
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,64,5120,0.021824000403285027
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,64,5120,0.02022399939596653
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,64,6144,0.017055999487638474
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,64,65536,0.20083199441432953
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,64,5120,0.02223999984562397
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,64,4096,0.019168000668287277
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,64,4096,0.01929599978029728
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,64,3584,0.017952000722289085
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,64,4096,0.018112000077962875
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,64,3584,0.018751999363303185
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,64,3584,0.016992000862956047
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,64,7168,0.023264000192284584
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,64,3072,0.017503999173641205
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,64,3072,0.017216000705957413
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,64,3072,0.016224000602960587
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,64,2560,0.015168000012636185
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,64,2560,0.016256000846624374
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,64,2560,0.015039999969303608
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,64,2048,0.014240000396966934
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,64,2048,0.015135999768972397
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,64,1536,0.012543999589979649
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,64,2048,0.01283199992030859
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,64,1536,0.012959999963641167
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,64,1536,0.010847999714314938
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,64,1024,0.011776000261306763
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,64,768,0.010912000201642513
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,64,1024,0.011359999887645245
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,64,1024,0.009855999611318111
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,64,768,0.010367999784648418
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,64,768,0.009056000038981438
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,64,512,0.010143999941647053
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,64,512,0.009535999968647957
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,64,512,0.009151999838650227
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,64,256,0.009472000412642956
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,64,256,0.007391999941319227
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,64,256,0.009535999968647957
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,64,128,0.009568000212311745
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,64,128,0.008736000396311283
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,64,128,0.007071999832987785
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,64,64,0.012640000320971012
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,64,64,0.011711999773979187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,64,64,0.007135999854654074
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,64,32,0.012128000147640705
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,64,32,0.012640000320971012
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,64,32,0.007071999832987785
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,32,12288,0.03267199918627739
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,32,12288,0.03254399821162224
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,32,16384,0.03923200070858002
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,32,16384,0.03903999924659729
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,32,16384,0.05113599821925163
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,32,12288,0.043007999658584595
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,32,10240,0.02812799997627735
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,32,10240,0.028896000236272812
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,32,8192,0.02550400048494339
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,32,10240,0.036159999668598175
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,32,8192,0.026079999282956123
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,32,7168,0.023679999634623528
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,32,7168,0.02396799996495247
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,32,8192,0.02099199965596199
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,32,65536,0.048287998884916306
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,32,65536,0.04835199937224388
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,32,7168,0.02828799933195114
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,32,6144,0.022175999358296394
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,32,6144,0.022784000262618065
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,32,5120,0.021088000386953354
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,32,6144,0.023423999547958374
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,32,5120,0.020735999569296837
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,32,4096,0.019200000911951065
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,32,5120,0.020479999482631683
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,32,65536,0.11132799834012985
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,32,4096,0.0191040001809597
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,32,4096,0.017152000218629837
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,32,3584,0.01849599927663803
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,32,3584,0.018432000651955605
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,32,3072,0.018144000321626663
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,32,3584,0.01603199914097786
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,32,3072,0.01484800036996603
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,32,3072,0.01692800037562847
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,32,2560,0.0161920003592968
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,32,2560,0.016416000202298164
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,32,2048,0.014720000326633453
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,32,2048,0.01462399959564209
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,32,2560,0.013663999736309052
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,32,2048,0.01244799979031086
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,32,1536,0.012480000033974648
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,32,1536,0.013504000380635262
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,32,1024,0.011327999643981457
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,32,1536,0.010623999871313572
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,32,1024,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,32,768,0.01033599954098463
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,32,1024,0.00902399979531765
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,32,768,0.010239999741315842
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,32,512,0.009983999654650688
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,32,512,0.009759999811649323
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,32,768,0.009855999611318111
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,32,512,0.00854399986565113
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,32,256,0.008895999751985073
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,32,256,0.00902399979531765
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,32,128,0.00886400043964386
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,32,256,0.0072639998979866505
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,32,128,0.008832000195980072
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,32,128,0.00684799998998642
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,32,64,0.012160000391304493
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,32,32,32,0.012671999633312225
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,32,64,0.006976000033318996
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,32,32,0.012319999746978283
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,32,32,32,0.007199999876320362
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,32,32,64,0.012384000234305859
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,65536,12288,0.19193600118160248
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,65536,16384,0.25068798661231995
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,65536,12288,0.29840001463890076
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,65536,10240,0.24700799584388733
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,65536,10240,0.16396799683570862
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,65536,16384,0.38678398728370667
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,65536,12288,0.27884799242019653
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,65536,8192,0.13120000064373016
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,65536,8192,0.20047999918460846
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,65536,7168,0.11734399944543839
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,65536,7168,0.17769600450992584
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,65536,16384,0.36419200897216797
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,65536,10240,0.23520000278949738
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,65536,6144,0.10211200267076492
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,65536,8192,0.18860800564289093
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,65536,6144,0.15161600708961487
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,65536,5120,0.08793599903583527
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,65536,7168,0.16857600212097168
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,65536,5120,0.12857599556446075
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,65536,4096,0.07311999797821045
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,65536,4096,0.1035199984908104
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,65536,6144,0.14508800208568573
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,65536,5120,0.12371200323104858
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,65536,3584,0.09279999881982803
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,65536,3584,0.0652799978852272
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,65536,4096,0.10175999999046326
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,65536,3072,0.0575999990105629
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,65536,3072,0.08108799904584885
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,65536,3584,0.09132800251245499
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,65536,2560,0.05040000006556511
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,65536,3072,0.08035200089216232
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,65536,2560,0.0689919963479042
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,65536,2048,0.05827200040221214
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,65536,2048,0.0416640006005764
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,65536,2560,0.06719999760389328
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,65536,1536,0.04639999940991402
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,65536,2048,0.0549440011382103
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,65536,1536,0.03545600175857544
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,65536,1024,0.02755199931561947
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,65536,1024,0.034143999218940735
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,65536,1536,0.044895999133586884
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,65536,768,0.02364799939095974
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,65536,1024,0.03155200183391571
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,65536,768,0.029120000079274178
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,65536,512,0.02271999977529049
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,65536,512,0.019872000440955162
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,65536,768,0.02630399912595749
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,65536,256,0.01648000068962574
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,65536,512,0.02067199908196926
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,65536,256,0.016127999871969223
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,65536,256,0.013472000136971474
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,65536,128,0.01321600005030632
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,65536,128,0.013183999806642532
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,65536,64,0.01603199914097786
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,65536,128,0.01065600011497736
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,65536,64,0.0163199994713068
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,65536,32,0.017791999503970146
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,65536,64,0.009119999594986439
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,65536,32,0.01759999990463257
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,65536,32,0.009247999638319016
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,16384,12288,0.05596800148487091
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,16384,12288,0.07980799674987793
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,16384,16384,0.07311999797821045
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,16384,16384,0.10438399761915207
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,16384,16384,0.09939199686050415
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,16384,10240,0.06787200272083282
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,16384,10240,0.04934399947524071
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,16384,12288,0.07705599814653397
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,16384,10240,0.06697600334882736
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,16384,8192,0.05510399863123894
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,16384,8192,0.05305600166320801
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,16384,7168,0.04995200037956238
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,16384,7168,0.047775998711586
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,16384,8192,0.055615998804569244
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,16384,65536,0.25436800718307495
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,16384,6144,0.044895999133586884
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,16384,6144,0.043136000633239746
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,16384,7168,0.04912000149488449
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,16384,5120,0.03951999917626381
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,16384,6144,0.043136000633239746
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,16384,65536,0.3935360014438629
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,16384,5120,0.029952000826597214
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,16384,4096,0.02518399991095066
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,16384,5120,0.03657599911093712
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,16384,4096,0.03299200162291527
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,16384,3584,0.03094400092959404
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,16384,3584,0.024000000208616257
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,16384,4096,0.030559999868273735
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,16384,3072,0.027807999402284622
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,16384,3584,0.028351999819278717
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,16384,2560,0.024288000538945198
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,16384,3072,0.02691200003027916
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,16384,2560,0.02579200081527233
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,16384,2560,0.022655999287962914
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,16384,2048,0.024383999407291412
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,16384,65536,0.36262398958206177
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,16384,2048,0.01740800030529499
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,16384,1536,0.01881599985063076
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,16384,2048,0.020191999152302742
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,16384,1536,0.015744000673294067
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,16384,1536,0.01679999940097332
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,16384,1024,0.0163199994713068
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,16384,1024,0.01532800029963255
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,16384,1024,0.013632000423967838
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,16384,768,0.014336000196635723
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,16384,768,0.013311999849975109
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,16384,768,0.01235199999064207
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,16384,512,0.012927999719977379
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,16384,512,0.011839999817311764
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,16384,512,0.011103999800980091
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,16384,256,0.011136000044643879
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,16384,3072,0.025439999997615814
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,16384,256,0.01056000031530857
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,16384,256,0.008736000396311283
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,16384,128,0.009472000412642956
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,16384,128,0.01027199998497963
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,16384,64,0.014560000039637089
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,16384,64,0.013728000223636627
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,16384,64,0.0074880002066493034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,16384,128,0.008383999578654766
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,16384,32,0.014527999795973301
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,16384,32,0.013311999849975109
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,16384,32,0.007584000006318092
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,12288,12288,0.0533440001308918
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,12288,12288,0.07027199864387512
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,12288,16384,0.0703359991312027
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,12288,16384,0.08950400352478027
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,12288,16384,0.07711999863386154
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,12288,12288,0.06124800071120262
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,12288,10240,0.06054399907588959
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,12288,10240,0.047359999269247055
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,12288,10240,0.05363199859857559
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,12288,8192,0.05056000128388405
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,12288,8192,0.03852799907326698
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,12288,8192,0.043935999274253845
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,12288,7168,0.04371200129389763
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,12288,7168,0.04755200073122978
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,12288,65536,0.25417599081993103
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,12288,6144,0.039744000881910324
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,12288,7168,0.039423998445272446
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,12288,6144,0.03222399950027466
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,12288,5120,0.03532800078392029
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,12288,5120,0.028511999174952507
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,12288,6144,0.03548799827694893
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,12288,5120,0.03030399978160858
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,12288,4096,0.024224000051617622
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,12288,65536,0.29183998703956604
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,12288,4096,0.029472000896930695
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,12288,3584,0.028224000707268715
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,12288,4096,0.025280000641942024
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,12288,3584,0.022752000018954277
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,12288,3584,0.023455999791622162
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,12288,3072,0.027712000533938408
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,12288,3072,0.02675200067460537
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,12288,3072,0.021856000646948814
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,12288,2560,0.024960000067949295
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,12288,2560,0.019071999937295914
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,12288,2560,0.020608000457286835
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,12288,2048,0.021247999742627144
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,12288,2048,0.02022399939596653
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,12288,2048,0.01759999990463257
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,12288,1536,0.017952000722289085
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,12288,65536,0.2796800136566162
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,12288,1536,0.018400000408291817
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,12288,1536,0.015263999812304974
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,12288,1024,0.015039999969303608
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,12288,1024,0.013824000023305416
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,12288,768,0.013791999779641628
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,12288,512,0.012640000320971012
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,12288,768,0.013279999606311321
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,12288,768,0.011711999773979187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,12288,1024,0.01360000018030405
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,12288,512,0.01228800043463707
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,12288,512,0.0098879998549819
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,12288,256,0.010528000071644783
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,12288,256,0.01119999960064888
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,12288,128,0.009503999724984169
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,12288,256,0.008799999952316284
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,12288,128,0.009440000168979168
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,12288,64,0.013632000423967838
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,12288,128,0.00774399982765317
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,12288,64,0.013663999736309052
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,12288,32,0.013632000423967838
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,12288,64,0.007519999984651804
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,12288,32,0.013439999893307686
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,12288,32,0.007615999784320593
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,10240,12288,0.05753599852323532
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,10240,16384,0.07046400010585785
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,10240,16384,0.07449600100517273
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,10240,16384,0.06771200150251389
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,10240,12288,0.05484800040721893
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,10240,12288,0.053888000547885895
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,10240,10240,0.045823998749256134
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,10240,10240,0.05075199902057648
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,10240,8192,0.03852799907326698
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,10240,8192,0.04214400053024292
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,10240,7168,0.03840000182390213
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,10240,10240,0.046720001846551895
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,10240,65536,0.25011199712753296
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,10240,8192,0.04089599847793579
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,10240,7168,0.03455999866127968
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,10240,6144,0.03222399950027466
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,10240,6144,0.03542400151491165
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,10240,6144,0.031072000041604042
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,10240,7168,0.03542400151491165
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,10240,5120,0.031136000528931618
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,10240,65536,0.26870399713516235
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,10240,5120,0.02812799997627735
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,10240,4096,0.027775999158620834
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,10240,4096,0.023903999477624893
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,10240,5120,0.027712000533938408
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,10240,4096,0.023520000278949738
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,10240,3584,0.021983999758958817
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,10240,3584,0.030559999868273735
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,10240,3584,0.02595200017094612
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,10240,3072,0.024064000695943832
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,10240,3072,0.02723200060427189
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,10240,65536,0.2409919947385788
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,10240,2560,0.02127999998629093
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,10240,3072,0.021088000386953354
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,10240,2560,0.02457600086927414
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,10240,2048,0.019711999222636223
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,10240,2560,0.01852799952030182
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,10240,2048,0.01692800037562847
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,10240,1536,0.016992000862956047
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,10240,2048,0.01648000068962574
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,10240,1536,0.01587199978530407
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,10240,1024,0.014911999925971031
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,10240,1024,0.014976000413298607
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,10240,1536,0.014368000440299511
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,10240,1024,0.01196799986064434
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,10240,768,0.013344000093638897
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,10240,768,0.012191999703645706
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,10240,768,0.011392000131309032
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,10240,512,0.012640000320971012
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,10240,512,0.011487999930977821
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,10240,256,0.01033599954098463
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,10240,512,0.009440000168979168
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,10240,256,0.010208000428974628
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,10240,256,0.008608000352978706
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,10240,128,0.009247999638319016
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,10240,128,0.009759999811649323
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,10240,64,0.013663999736309052
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,10240,128,0.00800000037997961
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,10240,64,0.013439999893307686
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,10240,32,0.013567999936640263
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,10240,32,0.013088000006973743
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,10240,64,0.007615999784320593
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,10240,32,0.007584000006318092
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,8192,12288,0.05395200103521347
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,8192,12288,0.04230400174856186
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,8192,16384,0.04873599857091904
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,8192,16384,0.06409599632024765
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,8192,16384,0.0560000017285347
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,8192,10240,0.04896000027656555
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,8192,12288,0.04521600157022476
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,8192,10240,0.03791999816894531
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,8192,10240,0.04064000025391579
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,8192,8192,0.032127998769283295
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,8192,8192,0.03206399828195572
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,8192,7168,0.03888000175356865
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,8192,65536,0.13715200126171112
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,8192,8192,0.03267199918627739
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,8192,7168,0.030271999537944794
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,8192,6144,0.03711999952793121
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,8192,7168,0.028991999104619026
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,8192,6144,0.02611199952661991
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,8192,65536,0.21145600080490112
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,8192,6144,0.026784000918269157
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,8192,5120,0.033440001308918
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,8192,5120,0.02304000034928322
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,8192,5120,0.02284800074994564
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,8192,4096,0.02828799933195114
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,8192,4096,0.020416000857949257
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,8192,3584,0.020031999796628952
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,8192,3584,0.01961600035429001
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,8192,4096,0.021407999098300934
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,8192,3584,0.01942400075495243
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,8192,3072,0.01788800023496151
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,8192,65536,0.19568000733852386
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,8192,3072,0.01772800087928772
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,8192,3072,0.017664000391960144
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,8192,2560,0.016448000445961952
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,8192,2560,0.01696000061929226
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,8192,2560,0.0163199994713068
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,8192,2048,0.01462399959564209
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,8192,2048,0.019648000597953796
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,8192,1536,0.019071999937295914
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,8192,2048,0.014336000196635723
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,8192,1536,0.013151999562978745
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,8192,1024,0.01142400037497282
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,8192,1024,0.01228800043463707
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,8192,1536,0.012319999746978283
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,8192,1024,0.01152000017464161
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,8192,768,0.01142400037497282
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,8192,768,0.011711999773979187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,8192,512,0.012032000347971916
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,8192,512,0.009759999811649323
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,8192,256,0.010048000141978264
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,8192,256,0.009247999638319016
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,8192,512,0.01033599954098463
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,8192,256,0.008128000423312187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,8192,128,0.009472000412642956
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,8192,128,0.00886400043964386
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,8192,64,0.012896000407636166
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,8192,128,0.0077760000713169575
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,8192,64,0.012480000033974648
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,8192,64,0.007712000049650669
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,8192,32,0.013439999893307686
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,8192,32,0.013055999763309956
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,8192,32,0.007455999962985516
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,8192,768,0.015552000142633915
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,7168,12288,0.04291199892759323
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,7168,16384,0.04521600157022476
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,7168,16384,0.05555199831724167
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,7168,12288,0.03903999924659729
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,7168,16384,0.0504320003092289
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,7168,12288,0.041728001087903976
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,7168,10240,0.03759999945759773
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,7168,10240,0.03516799956560135
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,7168,10240,0.03641600161790848
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,7168,8192,0.03167999908328056
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,7168,8192,0.029952000826597214
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,7168,7168,0.02864000014960766
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,7168,65536,0.1231359988451004
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,7168,7168,0.027583999559283257
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,7168,8192,0.03033600002527237
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,7168,65536,0.18774400651454926
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,7168,6144,0.02768000029027462
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,7168,7168,0.027168000116944313
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,7168,6144,0.026335999369621277
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,7168,5120,0.03094400092959404
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,7168,6144,0.0244159996509552
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,7168,5120,0.023584000766277313
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,7168,5120,0.021695999428629875
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,7168,4096,0.020640000700950623
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,7168,4096,0.021183999255299568
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,7168,3584,0.0197759997099638
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,7168,3584,0.019840000197291374
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,7168,3584,0.02067199908196926
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,7168,4096,0.02300800010561943
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,7168,65536,0.17523199319839478
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,7168,3072,0.018624000251293182
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,7168,3072,0.01724799908697605
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,7168,3072,0.016767999157309532
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,7168,2560,0.016416000202298164
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,7168,2560,0.01664000004529953
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,7168,2048,0.014783999882638454
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,7168,2560,0.01696000061929226
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,7168,2048,0.014783999882638454
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,7168,1536,0.013472000136971474
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,7168,2048,0.015456000342965126
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,7168,1536,0.013439999893307686
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,7168,1024,0.01228800043463707
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,7168,1536,0.013311999849975109
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,7168,1024,0.011776000261306763
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,7168,768,0.010975999757647514
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,7168,768,0.010528000071644783
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,7168,1024,0.012191999703645706
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,7168,512,0.01033599954098463
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,7168,768,0.011615999974310398
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,7168,512,0.01104000024497509
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,7168,512,0.009759999811649323
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,7168,256,0.009664000011980534
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,7168,256,0.009375999681651592
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,7168,128,0.009216000325977802
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,7168,256,0.007968000136315823
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,7168,128,0.00940799992531538
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,7168,128,0.008991999551653862
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,7168,64,0.013055999763309956
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,7168,64,0.012575999833643436
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,7168,64,0.0074880002066493034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,7168,32,0.012864000163972378
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,7168,32,0.00723200011998415
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,7168,32,0.012896000407636166
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,6144,12288,0.0344959981739521
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,6144,12288,0.04275200143456459
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,6144,16384,0.05369599908590317
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,6144,16384,0.04297599941492081
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,6144,16384,0.045504000037908554
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,6144,10240,0.03747199848294258
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,6144,10240,0.033376000821590424
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,6144,12288,0.038176000118255615
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,6144,10240,0.033215999603271484
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,6144,8192,0.02751999907195568
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,6144,8192,0.03574400022625923
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,6144,65536,0.10604800283908844
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,6144,8192,0.028095999732613564
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,6144,7168,0.025280000641942024
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,6144,65536,0.16553600132465363
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,6144,6144,0.03136000037193298
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,6144,6144,0.024480000138282776
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,6144,7168,0.02502400055527687
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,6144,6144,0.0244159996509552
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,6144,5120,0.0225600004196167
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,6144,5120,0.028192000463604927
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,6144,4096,0.025280000641942024
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,6144,5120,0.025919999927282333
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,6144,4096,0.020191999152302742
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,6144,65536,0.15689599514007568
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,6144,7168,0.032416000962257385
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,6144,3584,0.019936000928282738
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,6144,3584,0.022816000506281853
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,6144,4096,0.018912000581622124
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,6144,3072,0.01775999926030636
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,6144,3072,0.017376000061631203
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,6144,3584,0.020031999796628952
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,6144,2560,0.020447999238967896
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,6144,3072,0.018688000738620758
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,6144,2048,0.015231999568641186
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,6144,2560,0.016383999958634377
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,6144,2560,0.01462399959564209
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,6144,2048,0.014816000126302242
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,6144,1536,0.018271999433636665
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,6144,2048,0.014495999552309513
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,6144,1536,0.013024000450968742
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,6144,1536,0.013248000293970108
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,6144,768,0.012256000190973282
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,6144,768,0.014527999795973301
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,6144,1024,0.01600000075995922
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,6144,1024,0.01190400030463934
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,6144,1024,0.010975999757647514
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,6144,512,0.010015999898314476
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,6144,768,0.011296000331640244
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,6144,512,0.010463999584317207
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,6144,256,0.009568000212311745
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,6144,512,0.00979200005531311
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,6144,256,0.009184000082314014
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,6144,128,0.009312000125646591
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,6144,256,0.008415999822318554
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,6144,128,0.00940799992531538
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,6144,128,0.007455999962985516
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,6144,64,0.013504000380635262
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,6144,64,0.012992000207304955
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,6144,64,0.007391999941319227
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,6144,32,0.012736000120639801
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,6144,32,0.01283199992030859
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,6144,32,0.0074880002066493034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,5120,12288,0.032575998455286026
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,5120,12288,0.04041599854826927
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,5120,16384,0.03753599897027016
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,5120,16384,0.04902400076389313
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,5120,16384,0.04150399938225746
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,5120,10240,0.036896001547575
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,5120,12288,0.03488000109791756
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,5120,10240,0.028384000062942505
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,5120,10240,0.031199999153614044
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,5120,8192,0.03062400035560131
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,5120,8192,0.0244159996509552
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,5120,65536,0.09347199648618698
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,5120,7168,0.028960000723600388
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,5120,8192,0.025760000571608543
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,5120,7168,0.02319999970495701
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,5120,65536,0.13996799290180206
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,5120,6144,0.026944000273942947
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,5120,6144,0.022143999114632607
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,5120,7168,0.02316799946129322
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,5120,6144,0.0225600004196167
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,5120,5120,0.02470399998128414
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,5120,5120,0.021536000072956085
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,5120,4096,0.022752000018954277
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,5120,4096,0.019840000197291374
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,5120,5120,0.024512000381946564
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,5120,3584,0.019039999693632126
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,5120,3584,0.019551999866962433
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,5120,65536,0.14047999680042267
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,5120,4096,0.020576000213623047
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,5120,3584,0.0191040001809597
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,5120,3072,0.017184000462293625
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,5120,3072,0.01740800030529499
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,5120,3072,0.017632000148296356
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,5120,2560,0.016256000846624374
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,5120,2048,0.0191040001809597
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,5120,2560,0.016543999314308167
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,5120,2048,0.014783999882638454
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,5120,2560,0.014431999996304512
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,5120,1536,0.017055999487638474
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,5120,2048,0.014208000153303146
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,5120,1536,0.013024000450968742
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,5120,1024,0.01196799986064434
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,5120,1024,0.01228800043463707
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,5120,1536,0.01375999953597784
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,5120,768,0.015296000055968761
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,5120,1024,0.010975999757647514
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,5120,768,0.010879999957978725
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,5120,768,0.01017600018531084
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,5120,512,0.010143999941647053
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,5120,512,0.010400000028312206
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,5120,256,0.009920000098645687
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,5120,512,0.009824000298976898
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,5120,256,0.010367999784648418
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,5120,256,0.007968000136315823
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,5120,128,0.008671999908983707
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,5120,64,0.01283199992030859
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,5120,64,0.012703999876976013
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,5120,128,0.00774399982765317
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,5120,32,0.013504000380635262
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,5120,64,0.007679999805986881
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,5120,32,0.01244799979031086
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,5120,32,0.007424000184983015
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,4096,12288,0.03673600032925606
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,4096,16384,0.0331839993596077
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,4096,16384,0.04076800122857094
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,5120,128,0.009247999638319016
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,4096,16384,0.03728000074625015
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,4096,12288,0.027008000761270523
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,4096,10240,0.03248000144958496
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,4096,12288,0.031968001276254654
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,4096,10240,0.024607999250292778
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,4096,8192,0.02751999907195568
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,4096,10240,0.028575999662280083
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,4096,7168,0.028511999174952507
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,4096,65536,0.07955200225114822
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,4096,8192,0.02239999920129776
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,4096,65536,0.11952000111341476
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,4096,8192,0.03420799970626831
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,4096,7168,0.022175999358296394
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,4096,6144,0.02630399912595749
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,4096,7168,0.031168000772595406
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,4096,6144,0.022016000002622604
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,4096,5120,0.023391999304294586
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,4096,6144,0.027135999873280525
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,4096,5120,0.020640000700950623
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,4096,5120,0.017664000391960144
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,4096,4096,0.02035200037062168
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,4096,4096,0.019392000511288643
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,4096,4096,0.017023999243974686
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,4096,65536,0.1265919953584671
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,4096,3584,0.019711999222636223
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,4096,3584,0.0191040001809597
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,4096,3584,0.01600000075995922
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,4096,3072,0.017055999487638474
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,4096,3072,0.017983999103307724
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,4096,2560,0.016224000602960587
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,4096,3072,0.016831999644637108
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,4096,2560,0.016256000846624374
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,4096,2048,0.014655999839305878
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,4096,2048,0.014271999709308147
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,4096,2560,0.015424000099301338
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,4096,2048,0.011807999573647976
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,4096,1536,0.013120000250637531
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,4096,1536,0.01375999953597784
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,4096,1024,0.011648000217974186
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,4096,1024,0.01196799986064434
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,4096,1536,0.011327999643981457
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,4096,768,0.010688000358641148
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,4096,768,0.011071999557316303
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,4096,1024,0.009952000342309475
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,4096,768,0.011296000331640244
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,4096,512,0.010080000385642052
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,4096,512,0.009952000342309475
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,4096,256,0.009119999594986439
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,4096,512,0.009983999654650688
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,4096,256,0.009056000038981438
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,4096,128,0.008991999551653862
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,4096,256,0.008128000423312187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,4096,128,0.008767999708652496
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,4096,128,0.007360000163316727
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,4096,64,0.012223999947309494
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,4096,64,0.013663999736309052
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,4096,64,0.007327999919652939
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,4096,32,0.012736000120639801
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,4096,32,0.012512000277638435
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,4096,32,0.007296000141650438
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,3584,12288,0.034015998244285583
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,3584,12288,0.02579200081527233
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,3584,16384,0.03167999908328056
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,3584,16384,0.03868800029158592
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,3584,16384,0.03622400015592575
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,3584,12288,0.03097599931061268
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,3584,10240,0.024383999407291412
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,3584,10240,0.030719999223947525
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,3584,8192,0.026655999943614006
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,3584,10240,0.0297279991209507
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,3584,8192,0.0226879995316267
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,3584,7168,0.02486399933695793
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,3584,8192,0.02768000029027462
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,3584,7168,0.020959999412298203
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,3584,65536,0.07072000205516815
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,3584,65536,0.10329599678516388
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,3584,7168,0.03081599995493889
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,3584,6144,0.024992000311613083
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,3584,6144,0.021727999672293663
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,3584,6144,0.027712000533938408
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,3584,5120,0.022752000018954277
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,3584,5120,0.020800000056624413
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,3584,4096,0.02195199951529503
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,3584,4096,0.01974399946630001
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,3584,65536,0.12147200107574463
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,3584,5120,0.01897599920630455
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,3584,4096,0.016831999644637108
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,3584,3584,0.018783999606966972
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,3584,3584,0.020576000213623047
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,3584,3072,0.018079999834299088
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,3584,3072,0.01775999926030636
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,3584,2560,0.01571200042963028
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,3584,3072,0.016831999644637108
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,3584,2560,0.01648000068962574
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,3584,2048,0.014368000440299511
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,3584,2560,0.015231999568641186
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,3584,2048,0.01484800036996603
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,3584,1536,0.012959999963641167
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,3584,1536,0.012959999963641167
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,3584,2048,0.01360000018030405
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,3584,1024,0.011359999887645245
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,3584,1536,0.012543999589979649
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,3584,1024,0.011935999616980553
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,3584,1024,0.009759999811649323
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,3584,768,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,3584,768,0.01065600011497736
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,3584,768,0.009855999611318111
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,3584,512,0.010239999741315842
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,3584,512,0.009855999611318111
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,3584,3584,0.014431999996304512
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,3584,512,0.008960000239312649
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,3584,256,0.009440000168979168
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,3584,256,0.009088000282645226
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,3584,256,0.007968000136315823
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,3584,128,0.009312000125646591
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,3584,128,0.00902399979531765
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,3584,128,0.007360000163316727
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,3584,64,0.012736000120639801
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,3584,64,0.013120000250637531
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,3584,64,0.007327999919652939
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,3584,32,0.012480000033974648
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,3584,32,0.01244799979031086
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,3584,32,0.007552000228315592
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,3072,12288,0.031072000041604042
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,3072,12288,0.026016000658273697
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,3072,16384,0.03625600039958954
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,3072,16384,0.029311999678611755
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,3072,16384,0.03468799963593483
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,3072,12288,0.02953599952161312
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,3072,10240,0.029279999434947968
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,3072,10240,0.024383999407291412
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,3072,8192,0.02537599951028824
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,3072,10240,0.04095999896526337
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,3072,8192,0.022431999444961548
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,3072,8192,0.027264000847935677
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,3072,7168,0.02457600086927414
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,3072,65536,0.06313599646091461
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,3072,7168,0.021344000473618507
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,3072,65536,0.09145600348711014
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,3072,7168,0.031231999397277832
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,3072,6144,0.024288000538945198
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,3072,6144,0.022495999932289124
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,3072,5120,0.020735999569296837
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,3072,5120,0.0225600004196167
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,3072,6144,0.018464000895619392
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,3072,4096,0.021536000072956085
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,3072,4096,0.01942400075495243
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,3072,5120,0.018559999763965607
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,3072,65536,0.11740799993276596
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,3072,4096,0.020416000857949257
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,3072,3584,0.018688000738620758
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,3072,3584,0.019519999623298645
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,3072,3584,0.017983999103307724
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,3072,3072,0.017472000792622566
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,3072,3072,0.01759999990463257
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,3072,3072,0.01408000010997057
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,3072,2560,0.01664000004529953
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,3072,2560,0.01616000011563301
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,3072,2048,0.01408000010997057
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,3072,2048,0.014751999638974667
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,3072,2560,0.013088000006973743
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,3072,1536,0.013024000450968742
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,3072,2048,0.014368000440299511
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,3072,1536,0.01321600005030632
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,3072,1024,0.01196799986064434
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,3072,1024,0.011359999887645245
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,3072,1536,0.012256000190973282
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,3072,768,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,3072,1024,0.010400000028312206
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,3072,768,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,3072,768,0.010944000445306301
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,3072,512,0.01017600018531084
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,3072,512,0.010912000201642513
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,3072,256,0.009727999567985535
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,3072,512,0.00979200005531311
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,3072,256,0.009151999838650227
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,3072,128,0.009119999594986439
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,3072,256,0.007968000136315823
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,3072,128,0.008960000239312649
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,3072,64,0.012864000163972378
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,3072,128,0.007391999941319227
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,3072,64,0.012864000163972378
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,3072,64,0.007360000163316727
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,3072,32,0.012768000364303589
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,3072,32,0.012575999833643436
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,3072,32,0.007327999919652939
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,2560,12288,0.024768000468611717
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,2560,16384,0.03612799942493439
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,2560,16384,0.027424000203609467
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,2560,16384,0.03465599939227104
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,2560,12288,0.04713600128889084
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,2560,10240,0.027135999873280525
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,2560,10240,0.02304000034928322
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,2560,12288,0.030047999694943428
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,2560,8192,0.024831999093294144
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,2560,8192,0.02195199951529503
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,2560,10240,0.03996799886226654
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,2560,65536,0.05769599974155426
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,2560,8192,0.032575998455286026
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,2560,65536,0.08204799890518188
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,2560,7168,0.023840000852942467
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,2560,7168,0.022207999601960182
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,2560,6144,0.023360000923275948
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,2560,7168,0.03001599945127964
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,2560,5120,0.022463999688625336
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,2560,6144,0.022207999601960182
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,2560,5120,0.020191999152302742
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,2560,6144,0.0261439997702837
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,2560,4096,0.021344000473618507
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,2560,65536,0.115167997777462
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,2560,5120,0.022752000018954277
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,2560,4096,0.02035200037062168
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,2560,3584,0.020959999412298203
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,2560,4096,0.01961600035429001
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,2560,3584,0.019360000267624855
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,2560,3584,0.014944000169634819
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,2560,3072,0.017343999817967415
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,2560,3072,0.017823999747633934
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,2560,3072,0.01414399966597557
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,2560,2560,0.015552000142633915
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,2560,2560,0.016448000445961952
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,2560,2048,0.014911999925971031
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,2560,2560,0.013344000093638897
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,2560,2048,0.014783999882638454
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,2560,2048,0.011807999573647976
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,2560,1536,0.013887999579310417
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,2560,1536,0.01321600005030632
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,2560,1536,0.012128000147640705
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,2560,1024,0.011455999687314034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,2560,1024,0.011392000131309032
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,2560,1024,0.010048000141978264
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,2560,768,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,2560,768,0.011071999557316303
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,2560,512,0.010143999941647053
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,2560,768,0.011359999887645245
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,2560,512,0.009824000298976898
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,2560,256,0.009472000412642956
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,2560,512,0.009375999681651592
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,2560,256,0.009503999724984169
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,2560,256,0.008063999935984612
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,2560,128,0.00886400043964386
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,2560,128,0.00940799992531538
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,2560,128,0.007552000228315592
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,2560,64,0.012223999947309494
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,2560,64,0.01228800043463707
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,2560,64,0.007424000184983015
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,2560,32,0.012959999963641167
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,2560,32,0.012608000077307224
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,2560,32,0.007296000141650438
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,2048,12288,0.023552000522613525
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,2048,12288,0.02675200067460537
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,2048,16384,0.0318400003015995
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,2048,16384,0.026815999299287796
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,2048,16384,0.033824000507593155
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,2048,12288,0.028991999104619026
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,2048,10240,0.027295999228954315
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,2048,10240,0.022816000506281853
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,2048,10240,0.03929600119590759
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,2048,8192,0.02473600022494793
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,2048,8192,0.02208000048995018
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,2048,7168,0.023135999217629433
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,2048,65536,0.05113599821925163
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,2048,8192,0.03296000137925148
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,2048,65536,0.07433599978685379
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,2048,7168,0.02179200015962124
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,2048,7168,0.029952000826597214
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,2048,6144,0.024064000695943832
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,2048,6144,0.02191999927163124
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,2048,6144,0.02579200081527233
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,2048,5120,0.02332800067961216
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,2048,5120,0.021983999758958817
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,2048,5120,0.022431999444961548
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,2048,4096,0.022016000002622604
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,2048,4096,0.02038400061428547
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,2048,65536,0.11423999816179276
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,2048,4096,0.019231999292969704
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,2048,3584,0.019168000668287277
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,2048,3584,0.019711999222636223
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,2048,3584,0.018079999834299088
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,2048,3072,0.01744000054895878
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,2048,3072,0.017664000391960144
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,2048,2560,0.015584000386297703
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,2048,3072,0.013535999692976475
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,2048,2560,0.016607999801635742
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,2048,2048,0.015072000212967396
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,2048,2560,0.014751999638974667
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,2048,2048,0.014976000413298607
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,2048,2048,0.011711999773979187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,2048,1536,0.012927999719977379
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,2048,1536,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,2048,1024,0.01206399966031313
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,2048,1024,0.01196799986064434
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,2048,768,0.011071999557316303
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,2048,1024,0.010367999784648418
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,2048,768,0.010847999714314938
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,2048,768,0.011839999817311764
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,2048,512,0.009855999611318111
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,2048,512,0.010143999941647053
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,2048,512,0.009279999881982803
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,2048,256,0.009952000342309475
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,2048,256,0.009184000082314014
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,2048,128,0.008960000239312649
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,2048,256,0.008448000065982342
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,2048,128,0.009056000038981438
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,2048,1536,0.013279999606311321
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,2048,128,0.007424000184983015
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,2048,64,0.012575999833643436
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,2048,64,0.012543999589979649
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,2048,64,0.0072639998979866505
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,2048,32,0.012703999876976013
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,2048,32,0.007040000054985285
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,2048,32,0.012799999676644802
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,1536,12288,0.02751999907195568
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,1536,12288,0.024191999807953835
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,1536,16384,0.02703999914228916
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,1536,16384,0.030047999694943428
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,1536,16384,0.04089599847793579
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,1536,12288,0.033535998314619064
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,1536,10240,0.02579200081527233
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,1536,10240,0.02377600036561489
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,1536,10240,0.039103999733924866
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,1536,8192,0.02470399998128414
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,1536,8192,0.023296000435948372
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,1536,8192,0.032127998769283295
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,1536,7168,0.024288000538945198
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,1536,65536,0.05833600088953972
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,1536,65536,0.04499199986457825
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,1536,7168,0.023903999477624893
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,1536,6144,0.024320000782608986
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,1536,7168,0.02908799983561039
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,1536,5120,0.022816000506281853
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,1536,6144,0.024288000538945198
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,1536,5120,0.0225600004196167
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,1536,6144,0.026016000658273697
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,1536,4096,0.020479999482631683
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,1536,5120,0.01833599992096424
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,1536,4096,0.020640000700950623
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,1536,65536,0.11347199976444244
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,1536,3584,0.019071999937295914
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,1536,3584,0.019711999222636223
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,1536,4096,0.01852799952030182
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,1536,3072,0.01692800037562847
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,1536,3584,0.014720000326633453
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,1536,3072,0.017535999417304993
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,1536,3072,0.013632000423967838
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,1536,2560,0.016095999628305435
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,1536,2560,0.016575999557971954
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,1536,2048,0.013856000266969204
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,1536,2560,0.01462399959564209
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,1536,2048,0.0144640002399683
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,1536,1536,0.013663999736309052
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,1536,2048,0.013055999763309956
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,1536,1536,0.013248000293970108
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,1536,1536,0.010495999827980995
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,1536,1024,0.011359999887645245
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,1536,1024,0.012000000104308128
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,1536,768,0.010463999584317207
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,1536,1024,0.009535999968647957
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,1536,768,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,1536,768,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,1536,512,0.009855999611318111
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,1536,256,0.009279999881982803
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,1536,512,0.010111999697983265
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,1536,512,0.008991999551653862
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,1536,256,0.009312000125646591
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,1536,128,0.009119999594986439
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,1536,256,0.007968000136315823
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,1536,128,0.00902399979531765
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,1536,128,0.0071680000983178616
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,1536,64,0.012575999833643436
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,1536,64,0.01283199992030859
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,1536,64,0.007007999811321497
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,1536,32,0.012191999703645706
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,1536,32,0.012575999833643436
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,1536,32,0.007391999941319227
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,1024,12288,0.031039999797940254
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,1024,12288,0.029791999608278275
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,1024,16384,0.030719999223947525
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,1024,16384,0.03340800106525421
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,1024,16384,0.03999999910593033
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,1024,12288,0.04479999840259552
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,1024,10240,0.02969600073993206
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,1024,10240,0.029279999434947968
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,1024,8192,0.026847999542951584
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,1024,8192,0.02687999978661537
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,1024,8192,0.03235200047492981
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,1024,65536,0.042047999799251556
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,1024,7168,0.024480000138282776
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,1024,7168,0.024288000538945198
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,1024,65536,0.05276799947023392
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,1024,7168,0.02937600016593933
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,1024,5120,0.021888000890612602
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,1024,6144,0.02518399991095066
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,1024,5120,0.02143999934196472
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,1024,6144,0.023104000836610794
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,1024,6144,0.025087999179959297
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,1024,65536,0.11209599673748016
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,1024,10240,0.02924799919128418
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,1024,5120,0.021695999428629875
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,1024,4096,0.020255999639630318
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,1024,4096,0.020447999238967896
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,1024,3584,0.019487999379634857
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,1024,4096,0.018783999606966972
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,1024,3584,0.019392000511288643
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,1024,3584,0.017376000061631203
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,1024,3072,0.01852799952030182
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,1024,3072,0.017920000478625298
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,1024,2560,0.016448000445961952
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,1024,2560,0.015359999611973763
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,1024,3072,0.013407999649643898
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,1024,2048,0.014368000440299511
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,1024,2560,0.014495999552309513
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,1024,2048,0.014783999882638454
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,1024,2048,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,1024,1536,0.01375999953597784
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,1024,1536,0.013024000450968742
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,1024,1024,0.011744000017642975
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,1024,1536,0.010304000228643417
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,1024,1024,0.01142400037497282
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,1024,768,0.011103999800980091
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,1024,1024,0.009440000168979168
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,1024,768,0.010688000358641148
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,1024,512,0.010015999898314476
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,1024,512,0.009568000212311745
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,1024,768,0.011103999800980091
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,1024,512,0.009056000038981438
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,1024,256,0.009247999638319016
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,1024,256,0.009344000369310379
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,1024,256,0.007872000336647034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,1024,128,0.008799999952316284
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,1024,128,0.009312000125646591
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,1024,64,0.012640000320971012
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,1024,128,0.007424000184983015
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,1024,64,0.012256000190973282
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,1024,32,0.012768000364303589
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,1024,64,0.006816000211983919
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,1024,32,0.012512000277638435
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,1024,32,0.00723200011998415
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,768,12288,0.03311999887228012
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,768,12288,0.031808000057935715
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,768,16384,0.03440000116825104
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,768,16384,0.033504001796245575
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,768,16384,0.03977600112557411
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,768,12288,0.033376000821590424
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,768,10240,0.03110400028526783
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,768,10240,0.029791999608278275
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,768,10240,0.028991999104619026
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,768,8192,0.026176000013947487
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,768,8192,0.026176000013947487
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,768,8192,0.03174399957060814
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,768,7168,0.024064000695943832
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,768,65536,0.04342399910092354
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,768,65536,0.05104000121355057
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,768,7168,0.02409599907696247
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,768,7168,0.029120000079274178
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,768,6144,0.02412799932062626
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,768,6144,0.02364799939095974
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,768,5120,0.02252800017595291
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,768,5120,0.022048000246286392
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,768,6144,0.024512000381946564
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,768,4096,0.020096000283956528
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,768,5120,0.021983999758958817
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,768,4096,0.019231999292969704
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,768,65536,0.11190400272607803
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,768,3584,0.019200000911951065
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,768,3584,0.018848000094294548
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,768,4096,0.018688000738620758
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,768,3584,0.01727999933063984
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,768,3072,0.018015999346971512
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,768,3072,0.01772800087928772
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,768,2560,0.016607999801635742
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,768,3072,0.016127999871969223
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,768,2560,0.016543999314308167
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,768,2560,0.014431999996304512
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,768,2048,0.014495999552309513
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,768,2048,0.014592000283300877
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,768,1536,0.013088000006973743
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,768,2048,0.010591999627649784
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,768,1536,0.012768000364303589
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,768,1024,0.012032000347971916
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,768,1536,0.011455999687314034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,768,1024,0.011648000217974186
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,768,1024,0.009056000038981438
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,768,768,0.010847999714314938
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,768,768,0.010975999757647514
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,768,768,0.011071999557316303
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,768,512,0.010400000028312206
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,768,512,0.009727999567985535
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,768,256,0.009216000325977802
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,768,256,0.009184000082314014
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,768,256,0.0077760000713169575
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,768,128,0.009440000168979168
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,768,128,0.008671999908983707
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,768,64,0.012608000077307224
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,768,128,0.007584000006318092
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,768,64,0.012480000033974648
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,768,64,0.007519999984651804
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,768,32,0.012480000033974648
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,768,32,0.012575999833643436
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,768,32,0.007424000184983015
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,768,512,0.009375999681651592
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,512,12288,0.03308799862861633
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,512,16384,0.04089599847793579
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,512,12288,0.033535998314619064
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,512,16384,0.04163200035691261
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,512,16384,0.039583999663591385
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,512,12288,0.027264000847935677
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,512,10240,0.028863999992609024
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,512,10240,0.029311999678611755
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,512,8192,0.02582399919629097
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,512,10240,0.024768000468611717
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,512,8192,0.025151999667286873
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,512,65536,0.048448000103235245
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,512,7168,0.02364799939095974
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,512,65536,0.05260799825191498
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,512,8192,0.031936001032590866
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,512,7168,0.02319999970495701
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,512,6144,0.02195199951529503
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,512,7168,0.02876799926161766
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,512,6144,0.024032000452280045
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,512,5120,0.021056000143289566
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,512,6144,0.02518399991095066
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,512,5120,0.021376000717282295
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,512,5120,0.021695999428629875
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,512,4096,0.01942400075495243
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,512,4096,0.019999999552965164
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,512,65536,0.11142399907112122
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,512,4096,0.01913600042462349
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,512,3584,0.02022399939596653
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,512,3584,0.018559999763965607
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,512,3584,0.01740800030529499
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,512,3072,0.017311999574303627
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,512,3072,0.01740800030529499
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,512,2560,0.015807999297976494
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,512,3072,0.015424000099301338
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,512,2560,0.016256000846624374
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,512,2560,0.014240000396966934
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,512,2048,0.013952000066637993
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,512,2048,0.014560000039637089
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,512,1536,0.013120000250637531
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,512,1536,0.01283199992030859
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,512,2048,0.012703999876976013
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,512,1024,0.012128000147640705
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,512,1536,0.011392000131309032
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,512,1024,0.01142400037497282
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,512,1024,0.012128000147640705
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,512,768,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,512,512,0.010143999941647053
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,512,768,0.01017600018531084
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,512,768,0.010879999957978725
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,512,512,0.010143999941647053
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,512,256,0.00902399979531765
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,512,512,0.008832000195980072
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,512,256,0.008736000396311283
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,512,256,0.007968000136315823
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,512,128,0.00940799992531538
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,512,64,0.012896000407636166
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,512,128,0.008448000065982342
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,512,128,0.007391999941319227
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,512,64,0.012223999947309494
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,512,32,0.012128000147640705
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,512,64,0.0071680000983178616
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,512,32,0.01228800043463707
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,512,32,0.007327999919652939
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,256,12288,0.03110400028526783
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,256,12288,0.0318400003015995
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,256,16384,0.038047999143600464
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,256,16384,0.04198399931192398
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,256,16384,0.039583999663591385
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,256,12288,0.03276799991726875
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,256,10240,0.02831999957561493
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,256,10240,0.027936000376939774
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,256,10240,0.03872000053524971
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,256,8192,0.024960000067949295
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,256,8192,0.025760000571608543
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,256,7168,0.02316799946129322
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,256,8192,0.03161599859595299
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,256,65536,0.07436800003051758
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,256,65536,0.07203199714422226
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,256,7168,0.023871999233961105
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,256,7168,0.02879999950528145
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,256,6144,0.023264000192284584
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,256,5120,0.021183999255299568
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,256,6144,0.022463999688625336
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,256,5120,0.020416000857949257
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,256,6144,0.020191999152302742
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,256,4096,0.01958400011062622
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,256,65536,0.13900800049304962
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,256,4096,0.0191040001809597
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,256,3584,0.01865600049495697
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,256,3584,0.017952000722289085
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,256,4096,0.01958400011062622
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,256,3584,0.014336000196635723
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,256,3072,0.01727999933063984
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,256,3072,0.017855999991297722
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,256,3072,0.0161920003592968
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,256,2560,0.015936000272631645
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,256,2560,0.01583999954164028
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,256,2560,0.014271999709308147
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,256,2048,0.014399999752640724
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,256,2048,0.014720000326633453
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,256,2048,0.012575999833643436
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,256,1536,0.013311999849975109
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,256,5120,0.02163200080394745
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,256,1536,0.012736000120639801
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,256,1536,0.011680000461637974
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,256,1024,0.011264000087976456
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,256,1024,0.011487999930977821
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,256,1024,0.009535999968647957
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,256,768,0.010688000358641148
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,256,768,0.010591999627649784
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,256,768,0.010495999827980995
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,256,512,0.010048000141978264
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,256,512,0.01033599954098463
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,256,256,0.009247999638319016
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,256,512,0.009312000125646591
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,256,256,0.009312000125646591
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,256,256,0.007584000006318092
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,256,128,0.008736000396311283
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,256,128,0.00854399986565113
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,256,128,0.007391999941319227
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,256,64,0.012575999833643436
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,256,64,0.012896000407636166
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,256,64,0.0072639998979866505
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,256,32,0.012512000277638435
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,256,32,0.012256000190973282
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,256,32,0.007071999832987785
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,128,12288,0.030880000442266464
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,128,12288,0.030912000685930252
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,128,16384,0.04032000154256821
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,128,16384,0.03788800165057182
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,128,16384,0.04022400081157684
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,128,12288,0.04438399896025658
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,128,10240,0.02879999950528145
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,128,10240,0.028575999662280083
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,128,8192,0.02489599958062172
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,128,10240,0.03827200084924698
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,128,8192,0.024351999163627625
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,128,65536,0.07152000069618225
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,128,7168,0.024000000208616257
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,128,65536,0.0732479989528656
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,128,8192,0.031199999153614044
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,128,7168,0.022655999287962914
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,128,7168,0.02876799926161766
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,128,6144,0.0226879995316267
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,128,6144,0.021695999428629875
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,128,5120,0.021344000473618507
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,128,5120,0.021344000473618507
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,128,6144,0.026048000901937485
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,128,65536,0.1393599957227707
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,128,5120,0.01724799908697605
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,128,4096,0.01990400068461895
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,128,4096,0.018848000094294548
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,128,4096,0.018400000408291817
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,128,3584,0.018432000651955605
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,128,3584,0.018880000337958336
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,128,3584,0.017216000705957413
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,128,3072,0.017311999574303627
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,128,3072,0.017343999817967415
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,128,2560,0.01679999940097332
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,128,3072,0.015936000272631645
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,128,2048,0.014336000196635723
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,128,2560,0.014399999752640724
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,128,2560,0.015776000916957855
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,128,2048,0.014655999839305878
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,128,2048,0.013183999806642532
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,128,1536,0.012799999676644802
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,128,1536,0.012384000234305859
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,128,1536,0.011103999800980091
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,128,1024,0.011264000087976456
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,128,1024,0.01142400037497282
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,128,1024,0.008767999708652496
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,128,768,0.01104000024497509
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,128,768,0.010688000358641148
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,128,512,0.010080000385642052
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,128,768,0.010239999741315842
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,128,512,0.009344000369310379
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,128,256,0.008960000239312649
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,128,512,0.009440000168979168
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,128,256,0.008704000152647495
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,128,128,0.008960000239312649
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,128,256,0.0077760000713169575
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,128,128,0.008927999995648861
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,128,128,0.007296000141650438
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,128,64,0.01228800043463707
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,128,64,0.012736000120639801
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,128,32,0.012575999833643436
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,128,32,0.012640000320971012
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,128,64,0.00684799998998642
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,128,32,0.007071999832987785
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,64,12288,0.0315839983522892
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,64,12288,0.030527999624609947
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,64,16384,0.03891199827194214
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,64,16384,0.038495998829603195
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,64,16384,0.03968000039458275
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,64,12288,0.043168000876903534
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,64,10240,0.027871999889612198
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,64,10240,0.02831999957561493
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,64,8192,0.025248000398278236
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,64,10240,0.028384000062942505
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,64,8192,0.024191999807953835
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,64,8192,0.024383999407291412
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,64,65536,0.06892800331115723
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,64,7168,0.02409599907696247
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,64,7168,0.02300800010561943
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,64,6144,0.022112000733613968
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,64,7168,0.022495999932289124
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,64,6144,0.02287999913096428
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,64,5120,0.020896000787615776
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,64,6144,0.02486399933695793
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,64,5120,0.021568000316619873
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,64,4096,0.019071999937295914
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,64,5120,0.020416000857949257
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,64,65536,0.13926400244235992
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,64,4096,0.019200000911951065
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,64,3584,0.01817600056529045
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,64,4096,0.014655999839305878
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,64,3584,0.01881599985063076
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,64,3584,0.017664000391960144
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,64,3072,0.016704000532627106
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,64,65536,0.07212799787521362
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,64,3072,0.017472000792622566
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,64,2560,0.015296000055968761
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,64,3072,0.015807999297976494
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,64,2560,0.015904000028967857
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,64,2048,0.015168000012636185
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,64,2560,0.014015999622642994
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,64,2048,0.014175999909639359
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,64,1536,0.012608000077307224
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,64,2048,0.010623999871313572
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,64,1536,0.012671999633312225
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,64,1536,0.01065600011497736
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,64,1024,0.011711999773979187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,64,768,0.010208000428974628
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,64,1024,0.011359999887645245
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,64,1024,0.00979200005531311
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,64,768,0.01033599954098463
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,64,512,0.00940799992531538
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,64,768,0.008799999952316284
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,64,512,0.009952000342309475
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,64,512,0.00848000030964613
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,64,256,0.00979200005531311
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,64,256,0.008736000396311283
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,64,128,0.00848000030964613
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,64,128,0.00863999966531992
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,64,256,0.00774399982765317
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,64,128,0.006976000033318996
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,64,64,0.012095999903976917
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,64,64,0.012543999589979649
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,64,32,0.011872000060975552
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,64,64,0.006816000211983919
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,64,32,0.012128000147640705
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,64,32,0.00687999976798892
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,32,12288,0.031199999153614044
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,32,12288,0.03145600110292435
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,32,16384,0.038047999143600464
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,32,16384,0.0382080003619194
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,32,16384,0.03788800165057182
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,32,12288,0.031488001346588135
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,32,10240,0.028192000463604927
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,32,10240,0.029759999364614487
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,32,10240,0.027168000116944313
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,32,8192,0.024512000381946564
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,32,8192,0.024032000452280045
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,32,65536,0.07433599978685379
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,32,65536,0.07327999919652939
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,32,7168,0.023840000852942467
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,32,8192,0.03129599988460541
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,32,7168,0.0226879995316267
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,32,6144,0.022463999688625336
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,32,7168,0.021536000072956085
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,32,6144,0.022943999618291855
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,32,6144,0.01881599985063076
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,32,5120,0.021056000143289566
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,32,5120,0.02131200022995472
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,32,5120,0.01600000075995922
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,32,4096,0.01894400082528591
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,32,4096,0.01913600042462349
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,32,65536,0.13526399433612823
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,32,4096,0.01744000054895878
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,32,3584,0.01833599992096424
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,32,3584,0.019039999693632126
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,32,3584,0.014303999952971935
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,32,3072,0.016736000776290894
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,32,3072,0.018464000895619392
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,32,3072,0.012512000277638435
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,32,2560,0.015968000516295433
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,32,2560,0.015584000386297703
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,32,2560,0.013791999779641628
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,32,2048,0.014399999752640724
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,32,1536,0.012799999676644802
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,32,2048,0.010879999957978725
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,32,1536,0.012768000364303589
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,32,1024,0.01119999960064888
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,32,1536,0.010912000201642513
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,32,1024,0.01119999960064888
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,32,768,0.010688000358641148
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,32,768,0.010528000071644783
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,32,1024,0.009503999724984169
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,32,768,0.00979200005531311
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,32,512,0.0098879998549819
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,32,512,0.010400000028312206
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,32,256,0.009184000082314014
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,32,512,0.008799999952316284
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,32,256,0.008991999551653862
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,32,128,0.00863999966531992
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,32,256,0.0077760000713169575
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,32,128,0.009344000369310379
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,32,128,0.007199999876320362
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,32,64,0.01235199999064207
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,32,64,0.012512000277638435
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,32,2048,0.014592000283300877
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,16,32,32,0.012319999746978283
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,16,32,32,0.012384000234305859
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,32,64,0.007135999854654074
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,16,32,32,0.007135999854654074
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,65536,12288,0.19251200556755066
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,65536,10240,0.1619199961423874
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,65536,16384,0.25145599246025085
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,65536,10240,0.24995200335979462
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,65536,12288,0.29740801453590393
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,65536,12288,0.2781760096549988
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,65536,8192,0.1324480026960373
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,65536,16384,0.3938559889793396
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,65536,8192,0.19993600249290466
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,65536,7168,0.17580799758434296
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,65536,16384,0.36262398958206177
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,65536,7168,0.11776000261306763
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,65536,8192,0.18755200505256653
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,65536,10240,0.23206399381160736
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,65536,6144,0.10262399911880493
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,65536,6144,0.15654399991035461
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,65536,7168,0.16633599996566772
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,65536,5120,0.0883840024471283
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,65536,4096,0.07184000313282013
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,65536,5120,0.12838399410247803
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,65536,6144,0.144896000623703
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,65536,4096,0.1048320010304451
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,65536,5120,0.12236800044775009
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,65536,3584,0.06415999680757523
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,65536,4096,0.10044799745082855
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,65536,3584,0.09340800344944
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,65536,3072,0.08079999685287476
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,65536,3072,0.057631999254226685
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,65536,3584,0.09081599861383438
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,65536,3072,0.0793600007891655
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,65536,2560,0.04934399947524071
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,65536,2560,0.07020799815654755
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,65536,2048,0.04185599833726883
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,65536,2048,0.0586559996008873
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,65536,2560,0.06787200272083282
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,65536,1536,0.04707200080156326
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,65536,1536,0.03465599939227104
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,65536,2048,0.054976001381874084
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,65536,1024,0.03388800099492073
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,65536,1024,0.02739199995994568
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,65536,1536,0.04403200000524521
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,65536,1024,0.031488001346588135
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,65536,768,0.024480000138282776
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,65536,768,0.028575999662280083
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,65536,512,0.023264000192284584
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,65536,512,0.019231999292969704
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,65536,768,0.025631999596953392
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,65536,256,0.016127999871969223
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,65536,512,0.01958400011062622
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,65536,256,0.0163199994713068
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,65536,128,0.013279999606311321
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,65536,128,0.013376000337302685
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,65536,256,0.014368000440299511
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,65536,64,0.01664000004529953
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,65536,128,0.01056000031530857
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,65536,64,0.015936000272631645
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,65536,32,0.01616000011563301
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,65536,64,0.008991999551653862
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,65536,32,0.016224000602960587
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,65536,32,0.009151999838650227
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,16384,12288,0.07939200103282928
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,16384,16384,0.07126399874687195
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,16384,16384,0.1090880036354065
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,16384,16384,0.0981760025024414
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,16384,12288,0.07705599814653397
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,16384,10240,0.06831999868154526
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,16384,10240,0.04825599864125252
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,16384,12288,0.0551999993622303
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,16384,8192,0.041280001401901245
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,16384,8192,0.057440001517534256
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,16384,10240,0.06592000275850296
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,16384,65536,0.2529279887676239
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,16384,7168,0.05036799982190132
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,16384,8192,0.053727999329566956
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,16384,7168,0.037856001406908035
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,16384,6144,0.04527999833226204
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,16384,7168,0.047488000243902206
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,16384,6144,0.03494400158524513
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,16384,65536,0.3996480107307434
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,16384,5120,0.03872000053524971
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,16384,5120,0.029311999678611755
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,16384,6144,0.04262400045990944
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,16384,4096,0.0331839993596077
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,16384,4096,0.025536000728607178
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,16384,5120,0.03667199984192848
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,16384,3584,0.023744000121951103
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,16384,3584,0.03030399978160858
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,16384,4096,0.03062400035560131
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,16384,3584,0.02796800062060356
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,16384,65536,0.3591359853744507
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,16384,3072,0.02160000056028366
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,16384,3072,0.028031999245285988
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,16384,3072,0.025599999353289604
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,16384,2560,0.01913600042462349
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,16384,2560,0.02643200010061264
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,16384,2560,0.022975999861955643
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,16384,2048,0.02223999984562397
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,16384,2048,0.016896000131964684
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,16384,2048,0.02051199972629547
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,16384,1536,0.01583999954164028
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,16384,1024,0.01616000011563301
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,16384,1536,0.01974399946630001
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,16384,1024,0.013856000266969204
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,16384,1536,0.01724799908697605
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,16384,1024,0.013856000266969204
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,16384,768,0.014240000396966934
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,16384,768,0.012927999719977379
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,16384,512,0.01283199992030859
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,16384,768,0.01283199992030859
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,16384,512,0.01228800043463707
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,16384,512,0.01056000031530857
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,16384,256,0.011071999557316303
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,16384,256,0.010143999941647053
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,16384,128,0.01056000031530857
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,16384,256,0.00854399986565113
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,16384,128,0.009983999654650688
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,16384,64,0.01360000018030405
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,16384,64,0.01398400031030178
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,16384,128,0.008287999778985977
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,16384,64,0.007712000049650669
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,16384,32,0.012992000207304955
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,16384,32,0.013919999822974205
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,16384,32,0.007935999892652035
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,12288,12288,0.05366399884223938
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,12288,12288,0.06646399945020676
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,12288,16384,0.06896000355482101
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,12288,16384,0.08377599716186523
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,12288,16384,0.07689599692821503
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,12288,12288,0.0613120011985302
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,12288,10240,0.05913599953055382
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,12288,10240,0.04630399867892265
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,12288,10240,0.05251200124621391
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,12288,8192,0.05183999985456467
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,12288,8192,0.039455998688936234
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,12288,7168,0.04623999819159508
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,12288,8192,0.0427200011909008
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,12288,7168,0.03590400144457817
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,12288,65536,0.2548159956932068
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,12288,7168,0.03872000053524971
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,12288,6144,0.04214400053024292
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,12288,6144,0.032255999743938446
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,12288,65536,0.3089919984340668
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,12288,5120,0.028704000636935234
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,12288,6144,0.03446400165557861
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,12288,5120,0.03612799942493439
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,12288,4096,0.030079999938607216
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,12288,5120,0.0306560005992651
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,12288,4096,0.02457600086927414
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,12288,3584,0.02723200060427189
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,12288,3584,0.022624000906944275
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,12288,4096,0.025439999997615814
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,12288,3584,0.02409599907696247
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,12288,3072,0.024768000468611717
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,12288,65536,0.2789759933948517
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,12288,3072,0.020447999238967896
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,12288,3072,0.02191999927163124
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,12288,2560,0.018719999119639397
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,12288,2560,0.023615999147295952
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,12288,2560,0.02054399996995926
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,12288,2048,0.016736000776290894
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,12288,2048,0.019328000023961067
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,12288,2048,0.017696000635623932
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,12288,1536,0.01708799973130226
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,12288,1536,0.015168000012636185
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,12288,1024,0.014751999638974667
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,12288,1536,0.015359999611973763
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,12288,1024,0.01369599997997284
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,12288,1024,0.012128000147640705
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,12288,768,0.013824000023305416
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,12288,768,0.012256000190973282
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,12288,768,0.011776000261306763
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,12288,512,0.01283199992030859
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,12288,512,0.011839999817311764
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,12288,512,0.0098879998549819
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,12288,256,0.010591999627649784
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,12288,256,0.009056000038981438
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,12288,128,0.009535999968647957
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,12288,128,0.009503999724984169
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,12288,64,0.013887999579310417
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,12288,64,0.013311999849975109
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,12288,128,0.007584000006318092
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,12288,32,0.01414399966597557
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,12288,64,0.007584000006318092
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,12288,32,0.013407999649643898
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,12288,32,0.007648000027984381
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,10240,12288,0.061535999178886414
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,12288,256,0.010304000228643417
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,10240,16384,0.06950400024652481
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,10240,16384,0.07596799731254578
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,10240,16384,0.06659200042486191
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,10240,12288,0.05353600159287453
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,10240,12288,0.0544000007212162
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,10240,10240,0.045951999723911285
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,10240,10240,0.050175998359918594
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,10240,8192,0.043168000876903534
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,10240,10240,0.04700800031423569
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,10240,8192,0.039264000952243805
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,10240,7168,0.03884800150990486
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,10240,8192,0.03827200084924698
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,10240,65536,0.25648000836372375
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,10240,7168,0.0360959991812706
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,10240,6144,0.03747199848294258
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,10240,7168,0.03494400158524513
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,10240,65536,0.2696639895439148
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,10240,6144,0.032287999987602234
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,10240,5120,0.030271999537944794
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,10240,6144,0.03187200054526329
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,10240,5120,0.028063999488949776
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,10240,5120,0.026688000187277794
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,10240,4096,0.024191999807953835
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,10240,4096,0.02687999978661537
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,10240,3584,0.02630399912595749
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,10240,3584,0.022592000663280487
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,10240,4096,0.02316799946129322
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,10240,3072,0.020735999569296837
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,10240,3584,0.02300800010561943
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,10240,65536,0.24268800020217896
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,10240,3072,0.023296000435948372
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,10240,3072,0.020927999168634415
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,10240,2560,0.0191040001809597
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,10240,2560,0.022304000332951546
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,10240,2560,0.019360000267624855
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,10240,2048,0.016672000288963318
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,10240,2048,0.01926399953663349
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,10240,1536,0.016575999557971954
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,10240,2048,0.01603199914097786
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,10240,1536,0.01539199985563755
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,10240,1024,0.014431999996304512
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,10240,1536,0.015072000212967396
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,10240,1024,0.013472000136971474
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,10240,1024,0.012223999947309494
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,10240,768,0.013504000380635262
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,10240,768,0.011839999817311764
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,10240,512,0.011839999817311764
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,10240,768,0.011071999557316303
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,10240,512,0.011327999643981457
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,10240,256,0.010304000228643417
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,10240,512,0.01033599954098463
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,10240,256,0.010304000228643417
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,10240,128,0.009535999968647957
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,10240,128,0.009440000168979168
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,10240,256,0.008736000396311283
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,10240,128,0.007872000336647034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,10240,64,0.013472000136971474
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,10240,64,0.01360000018030405
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,10240,64,0.007584000006318092
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,10240,32,0.013279999606311321
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,10240,32,0.013407999649643898
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,10240,32,0.007391999941319227
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,8192,12288,0.04044799879193306
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,8192,12288,0.05427199974656105
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,8192,16384,0.04966399818658829
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,8192,16384,0.05673599988222122
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,8192,16384,0.055615998804569244
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,8192,10240,0.03731200098991394
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,8192,12288,0.04575999826192856
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,8192,10240,0.03798399865627289
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,8192,10240,0.03993599861860275
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,8192,8192,0.03267199918627739
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,8192,8192,0.03433600068092346
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,8192,7168,0.031136000528931618
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,8192,8192,0.03254399821162224
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,8192,65536,0.13779200613498688
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,8192,7168,0.03142400085926056
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,8192,6144,0.027744000777602196
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,8192,65536,0.21488000452518463
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,8192,6144,0.02828799933195114
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,8192,7168,0.02924799919128418
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,8192,6144,0.026208000257611275
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,8192,5120,0.023296000435948372
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,8192,4096,0.021376000717282295
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,8192,4096,0.02038400061428547
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,8192,5120,0.023391999304294586
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,8192,4096,0.02099199965596199
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,8192,3584,0.020767999812960625
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,8192,3584,0.019551999866962433
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,8192,65536,0.1947840005159378
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,8192,3584,0.01945599913597107
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,8192,3072,0.017696000635623932
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,8192,3072,0.01727999933063984
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,8192,3072,0.019200000911951065
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,8192,2560,0.017311999574303627
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,8192,2560,0.016224000602960587
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,8192,2048,0.015807999297976494
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,8192,2048,0.014592000283300877
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,8192,2560,0.018271999433636665
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,8192,2048,0.015359999611973763
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,8192,5120,0.02457600086927414
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,8192,1536,0.01369599997997284
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,8192,1536,0.013120000250637531
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,8192,1536,0.013535999692976475
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,8192,1024,0.01244799979031086
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,8192,1024,0.011615999974310398
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,8192,768,0.010975999757647514
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,8192,1024,0.010847999714314938
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,8192,768,0.011231999844312668
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,8192,768,0.011744000017642975
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,8192,512,0.01027199998497963
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,8192,512,0.01056000031530857
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,8192,512,0.009503999724984169
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,8192,256,0.009247999638319016
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,8192,256,0.009535999968647957
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,8192,256,0.008287999778985977
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,8192,128,0.009664000011980534
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,8192,128,0.009151999838650227
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,8192,64,0.013151999562978745
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,8192,64,0.01283199992030859
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,8192,128,0.008448000065982342
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,8192,32,0.012415999546647072
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,8192,64,0.0072639998979866505
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,8192,32,0.013088000006973743
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,8192,32,0.007584000006318092
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,7168,12288,0.03807999938726425
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,7168,12288,0.04281599819660187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,7168,16384,0.045632001012563705
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,7168,16384,0.055904000997543335
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,7168,16384,0.05075199902057648
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,7168,12288,0.0427200011909008
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,7168,10240,0.03734400123357773
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,7168,10240,0.03481600061058998
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,7168,8192,0.03142400085926056
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,7168,8192,0.03187200054526329
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,7168,10240,0.03750399872660637
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,7168,7168,0.029440000653266907
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,7168,8192,0.029120000079274178
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,7168,65536,0.12329600006341934
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,7168,7168,0.028863999992609024
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,7168,6144,0.027807999402284622
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,7168,7168,0.026240000501275063
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,7168,6144,0.026176000013947487
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,7168,65536,0.18857599794864655
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,7168,5120,0.023903999477624893
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,7168,5120,0.02393599972128868
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,7168,6144,0.023840000852942467
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,7168,4096,0.020447999238967896
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,7168,5120,0.02175999991595745
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,7168,4096,0.020447999238967896
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,7168,3584,0.019200000911951065
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,7168,4096,0.019840000197291374
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,7168,3584,0.01961600035429001
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,7168,3584,0.01913600042462349
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,7168,3072,0.01744000054895878
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,7168,3072,0.017696000635623932
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,7168,65536,0.1754239946603775
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,7168,3072,0.018592000007629395
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,7168,2560,0.016575999557971954
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,7168,2560,0.016224000602960587
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,7168,2560,0.017216000705957413
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,7168,2048,0.014879999682307243
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,7168,2048,0.01462399959564209
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,7168,2048,0.01398400031030178
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,7168,1536,0.013504000380635262
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,7168,1536,0.013663999736309052
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,7168,1536,0.013055999763309956
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,7168,1024,0.011935999616980553
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,7168,1024,0.012128000147640705
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,7168,1024,0.010879999957978725
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,7168,768,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,7168,768,0.011680000461637974
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,7168,768,0.012256000190973282
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,7168,512,0.011071999557316303
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,7168,512,0.010208000428974628
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,7168,256,0.01065600011497736
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,7168,512,0.009824000298976898
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,7168,256,0.009472000412642956
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,7168,128,0.009151999838650227
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,7168,256,0.008128000423312187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,7168,128,0.00979200005531311
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,7168,64,0.012896000407636166
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,7168,128,0.007679999805986881
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,7168,64,0.012768000364303589
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,7168,64,0.007360000163316727
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,7168,32,0.012927999719977379
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,7168,32,0.012575999833643436
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,7168,32,0.007455999962985516
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,6144,12288,0.033952001482248306
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,6144,12288,0.0414079986512661
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,6144,16384,0.05523199960589409
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,6144,16384,0.04134399816393852
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,6144,16384,0.04451199993491173
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,6144,12288,0.03814399987459183
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,6144,10240,0.03641600161790848
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,6144,10240,0.03222399950027466
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,6144,10240,0.033215999603271484
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,6144,8192,0.030688000842928886
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,6144,8192,0.02864000014960766
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,6144,8192,0.026688000187277794
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,6144,7168,0.029279999434947968
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,6144,65536,0.1093439981341362
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,6144,7168,0.02595200017094612
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,6144,6144,0.030559999868273735
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,6144,7168,0.02537599951028824
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,6144,6144,0.024288000538945198
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,6144,6144,0.02377600036561489
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,6144,5120,0.027135999873280525
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,6144,5120,0.023072000592947006
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,6144,5120,0.02035200037062168
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,6144,4096,0.02099199965596199
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,6144,4096,0.02112000063061714
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,6144,65536,0.15455999970436096
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,6144,3584,0.019392000511288643
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,6144,4096,0.017023999243974686
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,6144,3584,0.01990400068461895
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,6144,3584,0.01759999990463257
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,6144,3072,0.02143999934196472
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,6144,3072,0.01756799966096878
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,6144,65536,0.16889600455760956
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,6144,2560,0.016607999801635742
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,6144,2560,0.016127999871969223
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,6144,3072,0.017855999991297722
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,6144,2048,0.01484800036996603
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,6144,2048,0.014527999795973301
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,6144,2560,0.016448000445961952
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,6144,1536,0.013407999649643898
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,6144,2048,0.014655999839305878
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,6144,1536,0.013632000423967838
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,6144,1536,0.012095999903976917
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,6144,1024,0.011455999687314034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,6144,1024,0.011839999817311764
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,6144,768,0.011264000087976456
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,6144,768,0.01104000024497509
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,6144,1024,0.01119999960064888
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,6144,768,0.01027199998497963
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,6144,512,0.01033599954098463
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,6144,512,0.010143999941647053
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,6144,256,0.00979200005531311
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,6144,256,0.010400000028312206
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,6144,512,0.008991999551653862
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,6144,256,0.00800000037997961
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,6144,128,0.00940799992531538
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,6144,128,0.008960000239312649
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,6144,128,0.007807999849319458
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,6144,64,0.012992000207304955
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,6144,64,0.012768000364303589
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,6144,64,0.00723200011998415
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,6144,32,0.012512000277638435
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,6144,32,0.012703999876976013
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,6144,32,0.0071680000983178616
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,5120,12288,0.0363520011305809
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,5120,12288,0.03142400085926056
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,5120,16384,0.03683200106024742
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,5120,16384,0.047488000243902206
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,5120,16384,0.04076800122857094
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,5120,10240,0.0307839997112751
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,5120,12288,0.0350399985909462
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,5120,10240,0.027904000133275986
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,5120,8192,0.03222399950027466
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,5120,10240,0.03110400028526783
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,5120,8192,0.025119999423623085
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,5120,7168,0.029823999851942062
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,5120,8192,0.02502400055527687
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,5120,65536,0.09321600198745728
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,5120,7168,0.023903999477624893
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,5120,65536,0.144896000623703
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,5120,7168,0.024064000695943832
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,5120,6144,0.02271999977529049
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,5120,6144,0.022207999601960182
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,5120,5120,0.02131200022995472
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,5120,6144,0.027936000376939774
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,5120,5120,0.020640000700950623
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,5120,4096,0.019039999693632126
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,5120,4096,0.018559999763965607
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,5120,5120,0.02505600079894066
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,5120,4096,0.018015999346971512
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,5120,65536,0.14079999923706055
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,5120,3584,0.022143999114632607
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,5120,3584,0.01833599992096424
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,5120,3072,0.01775999926030636
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,5120,3072,0.01788800023496151
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,5120,3584,0.016736000776290894
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,5120,3072,0.017311999574303627
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,5120,2560,0.01648000068962574
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,5120,2560,0.017535999417304993
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,5120,2048,0.014047999866306782
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,5120,2048,0.01484800036996603
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,5120,1536,0.016287999227643013
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,5120,2048,0.01228800043463707
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,5120,1536,0.013887999579310417
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,5120,1024,0.011807999573647976
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,5120,1536,0.01196799986064434
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,5120,1024,0.011935999616980553
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,5120,768,0.010688000358641148
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,5120,1024,0.010080000385642052
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,5120,768,0.01104000024497509
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,5120,768,0.009983999654650688
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,5120,512,0.010688000358641148
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,5120,512,0.01027199998497963
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,5120,512,0.00886400043964386
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,5120,128,0.009216000325977802
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,5120,256,0.009375999681651592
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,5120,256,0.009151999838650227
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,5120,256,0.00848000030964613
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,5120,128,0.009503999724984169
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,5120,128,0.007519999984651804
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,5120,64,0.012671999633312225
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,5120,2560,0.013952000066637993
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,5120,64,0.012736000120639801
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,5120,64,0.007391999941319227
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,5120,32,0.012671999633312225
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,5120,32,0.007199999876320362
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,5120,32,0.012959999963641167
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,4096,12288,0.03465599939227104
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,4096,12288,0.026655999943614006
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,4096,16384,0.04303999990224838
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,4096,16384,0.03235200047492981
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,4096,16384,0.03711999952793121
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,4096,12288,0.031968001276254654
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,4096,10240,0.027904000133275986
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,4096,10240,0.02425600029528141
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,4096,10240,0.02828799933195114
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,4096,8192,0.024480000138282776
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,4096,8192,0.02239999920129776
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,4096,8192,0.02412799932062626
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,4096,7168,0.023744000121951103
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,4096,7168,0.021983999758958817
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,4096,65536,0.08134400099515915
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,4096,65536,0.12060800194740295
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,4096,6144,0.021344000473618507
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,4096,7168,0.03014400042593479
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,4096,6144,0.02191999927163124
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,4096,6144,0.026784000918269157
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,4096,5120,0.01881599985063076
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,4096,5120,0.019999999552965164
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,4096,4096,0.016831999644637108
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,4096,5120,0.02412799932062626
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,4096,4096,0.01635199971497059
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,4096,3584,0.01568000018596649
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,4096,4096,0.015776000916957855
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,4096,65536,0.12697599828243256
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,4096,3584,0.015807999297976494
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,4096,3584,0.01616000011563301
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,4096,3072,0.015135999768972397
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,4096,3072,0.015456000342965126
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,4096,3072,0.015200000256299973
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,4096,2560,0.015168000012636185
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,4096,2560,0.015168000012636185
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,4096,2560,0.013504000380635262
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,4096,2048,0.012575999833643436
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,4096,1536,0.012671999633312225
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,4096,2048,0.015039999969303608
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,4096,2048,0.012032000347971916
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,4096,1536,0.013151999562978745
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,4096,1024,0.011839999817311764
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,4096,1536,0.012160000391304493
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,4096,1024,0.011327999643981457
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,4096,1024,0.012575999833643436
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,4096,768,0.011103999800980091
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,4096,768,0.011231999844312668
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,4096,768,0.010944000445306301
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,4096,512,0.010080000385642052
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,4096,512,0.010048000141978264
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,4096,512,0.008704000152647495
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,4096,256,0.008927999995648861
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,4096,256,0.008991999551653862
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,4096,256,0.008383999578654766
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,4096,128,0.008991999551653862
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,4096,128,0.009696000255644321
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,4096,128,0.007391999941319227
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,4096,64,0.012736000120639801
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,4096,64,0.012640000320971012
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,4096,32,0.01228800043463707
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,4096,64,0.007360000163316727
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,4096,32,0.012896000407636166
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,4096,32,0.007296000141650438
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,3584,12288,0.032255999743938446
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,3584,16384,0.03903999924659729
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,3584,16384,0.030432000756263733
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,3584,16384,0.03590400144457817
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,3584,12288,0.030239999294281006
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,3584,10240,0.029503999277949333
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,3584,10240,0.023840000852942467
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,3584,10240,0.0289280004799366
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,3584,12288,0.02595200017094612
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,3584,8192,0.022816000506281853
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,3584,8192,0.02425600029528141
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,3584,65536,0.10758399963378906
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,3584,8192,0.022592000663280487
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,3584,65536,0.0716480016708374
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,3584,7168,0.026528000831604004
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,3584,7168,0.02143999934196472
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,3584,6144,0.01990400068461895
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,3584,6144,0.022112000733613968
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,3584,7168,0.03030399978160858
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,3584,5120,0.018464000895619392
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,3584,6144,0.021215999498963356
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,3584,5120,0.019999999552965164
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,3584,5120,0.019007999449968338
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,3584,4096,0.015744000673294067
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,3584,4096,0.016127999871969223
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,3584,65536,0.1231359988451004
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,3584,3584,0.015647999942302704
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,3584,4096,0.016383999958634377
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,3584,3584,0.015615999698638916
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,3584,3072,0.014911999925971031
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,3584,3072,0.015200000256299973
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,3584,3584,0.01568000018596649
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,3584,3072,0.016607999801635742
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,3584,2560,0.014911999925971031
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,3584,2560,0.014944000169634819
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,3584,2048,0.01398400031030178
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,3584,2560,0.01244799979031086
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,3584,2048,0.014527999795973301
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,3584,1536,0.01196799986064434
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,3584,2048,0.01360000018030405
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,3584,1536,0.013439999893307686
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,3584,1024,0.011264000087976456
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,3584,1024,0.01142400037497282
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,3584,1024,0.009375999681651592
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,3584,1536,0.01228800043463707
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,3584,768,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,3584,768,0.011231999844312668
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,3584,512,0.010463999584317207
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,3584,768,0.010944000445306301
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,3584,512,0.010239999741315842
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,3584,512,0.009151999838650227
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,3584,256,0.009184000082314014
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,3584,256,0.008704000152647495
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,3584,128,0.009151999838650227
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,3584,256,0.008832000195980072
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,3584,128,0.009664000011980534
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,3584,128,0.0072639998979866505
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,3584,64,0.012223999947309494
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,3584,64,0.011839999817311764
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,3584,64,0.007135999854654074
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,3584,32,0.012160000391304493
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,3584,32,0.01283199992030859
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,3584,32,0.0072639998979866505
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,3072,12288,0.02723200060427189
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,3072,12288,0.024639999493956566
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,3072,16384,0.03049599938094616
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,3072,16384,0.028863999992609024
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,3072,16384,0.03440000116825104
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,3072,12288,0.029279999434947968
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,3072,10240,0.022112000733613968
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,3072,10240,0.026655999943614006
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,3072,10240,0.04102399945259094
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,3072,8192,0.019071999937295914
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,3072,8192,0.01926399953663349
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,3072,8192,0.0323840007185936
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,3072,7168,0.017983999103307724
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,3072,7168,0.019071999937295914
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,3072,65536,0.08982399851083755
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,3072,65536,0.06521599739789963
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,3072,6144,0.016992000862956047
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,3072,7168,0.023455999791622162
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,3072,6144,0.01913600042462349
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,3072,5120,0.015968000516295433
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,3072,6144,0.021888000890612602
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,3072,5120,0.019071999937295914
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,3072,5120,0.018624000251293182
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,3072,4096,0.014112000353634357
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,3072,4096,0.014431999996304512
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,3072,4096,0.017152000218629837
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,3072,65536,0.11699199676513672
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,3072,3584,0.014431999996304512
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,3072,3584,0.013887999579310417
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,3072,3584,0.015296000055968761
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,3072,3072,0.013344000093638897
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,3072,3072,0.013632000423967838
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,3072,2560,0.013088000006973743
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,3072,3072,0.01635199971497059
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,3072,2560,0.013344000093638897
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,3072,2048,0.011680000461637974
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,3072,2048,0.013024000450968742
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,3072,2560,0.015263999812304974
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,3072,1536,0.011455999687314034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,3072,2048,0.013439999893307686
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,3072,1536,0.012768000364303589
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,3072,1536,0.010944000445306301
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,3072,1024,0.01196799986064434
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,3072,1024,0.011168000288307667
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,3072,1024,0.011071999557316303
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,3072,768,0.00940799992531538
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,3072,768,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,3072,512,0.02054399996995926
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,3072,512,0.0098879998549819
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,3072,512,0.009216000325977802
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,3072,256,0.00886400043964386
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,3072,256,0.00940799992531538
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,3072,128,0.008832000195980072
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,3072,256,0.00825599953532219
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,3072,128,0.008832000195980072
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,3072,64,0.012864000163972378
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,3072,128,0.007199999876320362
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,3072,64,0.012160000391304493
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,3072,32,0.013055999763309956
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,3072,64,0.007455999962985516
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,3072,32,0.012480000033974648
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,3072,32,0.0071680000983178616
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,3072,768,0.010975999757647514
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,2560,12288,0.024159999564290047
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,2560,16384,0.02937600016593933
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,2560,16384,0.028384000062942505
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,2560,12288,0.023871999233961105
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,2560,16384,0.034591998904943466
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,2560,12288,0.029823999851942062
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,2560,10240,0.026847999542951584
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,2560,10240,0.021247999742627144
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,2560,8192,0.018880000337958336
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,2560,10240,0.03190400078892708
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,2560,8192,0.019007999449968338
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,2560,65536,0.05724800005555153
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,2560,7168,0.01775999926030636
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,2560,65536,0.08012799918651581
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,2560,8192,0.025087999179959297
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,2560,7168,0.018719999119639397
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,2560,7168,0.029664000496268272
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,2560,6144,0.02316799946129322
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,2560,6144,0.018144000321626663
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,2560,5120,0.015552000142633915
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,2560,6144,0.025631999596953392
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,2560,5120,0.01929599978029728
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,2560,5120,0.018271999433636665
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,2560,4096,0.013919999822974205
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,2560,4096,0.013856000266969204
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,2560,65536,0.11414399743080139
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,2560,4096,0.015968000516295433
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,2560,3584,0.01369599997997284
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,2560,3584,0.013887999579310417
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,2560,3584,0.01532800029963255
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,2560,3072,0.012959999963641167
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,2560,3072,0.013344000093638897
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,2560,2560,0.012415999546647072
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,2560,3072,0.013952000066637993
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,2560,2560,0.012959999963641167
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,2560,2048,0.011615999974310398
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,2560,2560,0.014944000169634819
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,2560,2048,0.012927999719977379
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,2560,2048,0.013311999849975109
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,2560,1536,0.011071999557316303
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,2560,1536,0.012160000391304493
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,2560,1536,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,2560,1024,0.011359999887645245
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,2560,1024,0.009312000125646591
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,2560,1024,0.009759999811649323
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,2560,768,0.011136000044643879
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,2560,768,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,2560,512,0.008799999952316284
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,2560,768,0.011136000044643879
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,2560,512,0.009824000298976898
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,2560,256,0.008576000109314919
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,2560,512,0.009535999968647957
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,2560,256,0.009472000412642956
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,2560,128,0.008736000396311283
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,2560,256,0.0081599997356534
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,2560,128,0.008671999908983707
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,2560,128,0.007840000092983246
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,2560,64,0.012640000320971012
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,2560,64,0.006943999789655209
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,2560,64,0.012032000347971916
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,2560,32,0.008832000195980072
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,2560,32,0.012191999703645706
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,2560,32,0.00774399982765317
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,2048,12288,0.01961600035429001
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,2048,12288,0.018079999834299088
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,2048,16384,0.021824000403285027
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,2048,16384,0.024000000208616257
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,2048,16384,0.03488000109791756
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,2048,12288,0.045343998819589615
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,2048,10240,0.017823999747633934
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,2048,10240,0.017823999747633934
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,2048,8192,0.016992000862956047
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,2048,10240,0.029759999364614487
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,2048,8192,0.014527999795973301
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,2048,65536,0.051552001386880875
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,2048,8192,0.02425600029528141
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,2048,7168,0.014112000353634357
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,2048,7168,0.015296000055968761
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,2048,65536,0.07081600278615952
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,2048,6144,0.014240000396966934
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,2048,6144,0.014368000440299511
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,2048,5120,0.013376000337302685
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,2048,5120,0.01360000018030405
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,2048,6144,0.025248000398278236
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,2048,5120,0.023584000766277313
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,2048,4096,0.011648000217974186
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,2048,4096,0.011839999817311764
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,2048,65536,0.11267200112342834
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,2048,3584,0.011648000217974186
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,2048,3584,0.011103999800980091
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,2048,4096,0.015776000916957855
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,2048,3584,0.014655999839305878
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,2048,3072,0.01104000024497509
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,2048,3072,0.011487999930977821
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,2048,3072,0.013728000223636627
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,2048,7168,0.02271999977529049
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,2048,2560,0.01119999960064888
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,2048,2560,0.010528000071644783
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,2048,2560,0.015200000256299973
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,2048,2048,0.010015999898314476
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,2048,2048,0.01065600011497736
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,2048,1536,0.009759999811649323
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,2048,2048,0.013088000006973743
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,2048,1536,0.010528000071644783
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,2048,1024,0.009472000412642956
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,2048,1536,0.01056000031530857
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,2048,1024,0.009056000038981438
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,2048,768,0.008927999995648861
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,2048,1024,0.009184000082314014
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,2048,768,0.009472000412642956
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,2048,768,0.011744000017642975
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,2048,512,0.010208000428974628
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,2048,512,0.00940799992531538
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,2048,256,0.008767999708652496
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,2048,512,0.009312000125646591
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,2048,256,0.009535999968647957
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,2048,128,0.00800000037997961
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,2048,128,0.009696000255644321
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,2048,256,0.00800000037997961
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,2048,64,0.008352000266313553
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,2048,128,0.007807999849319458
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,2048,64,0.006976000033318996
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,2048,64,0.009119999594986439
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,2048,32,0.00825599953532219
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,2048,32,0.00940799992531538
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,2048,32,0.00723200011998415
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,1536,12288,0.01852799952030182
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,1536,12288,0.01772800087928772
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,1536,16384,0.02191999927163124
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,1536,16384,0.021376000717282295
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,1536,16384,0.05443200096487999
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,1536,12288,0.03376000002026558
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,1536,10240,0.01692800037562847
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,1536,10240,0.017343999817967415
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,1536,10240,0.03868800029158592
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,1536,8192,0.01500799972563982
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,1536,8192,0.013952000066637993
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,1536,8192,0.024447999894618988
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,1536,7168,0.014816000126302242
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,1536,65536,0.04499199986457825
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,1536,65536,0.059967998415231705
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,1536,7168,0.013919999822974205
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,1536,6144,0.012864000163972378
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,1536,7168,0.02876799926161766
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,1536,6144,0.014271999709308147
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,1536,5120,0.012864000163972378
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,1536,5120,0.01321600005030632
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,1536,6144,0.019648000597953796
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,1536,5120,0.021344000473618507
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,1536,4096,0.011839999817311764
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,1536,4096,0.011071999557316303
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,1536,65536,0.11161600053310394
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,1536,4096,0.018719999119639397
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,1536,3584,0.01152000017464161
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,1536,3584,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,1536,3584,0.017503999173641205
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,1536,3072,0.010591999627649784
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,1536,3072,0.011103999800980091
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,1536,3072,0.013567999936640263
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,1536,2560,0.011872000060975552
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,1536,2560,0.010847999714314938
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,1536,2560,0.014783999882638454
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,1536,2048,0.010432000271975994
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,1536,2048,0.009631999768316746
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,1536,1536,0.009216000325977802
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,1536,2048,0.013279999606311321
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,1536,1536,0.010688000358641148
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,1536,1024,0.009088000282645226
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,1536,1536,0.011711999773979187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,1536,1024,0.009664000011980534
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,1536,768,0.009056000038981438
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,1536,1024,0.009952000342309475
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,1536,768,0.009440000168979168
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,1536,768,0.011008000001311302
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,1536,512,0.009535999968647957
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,1536,512,0.008287999778985977
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,1536,512,0.009664000011980534
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,1536,256,0.007903999648988247
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,1536,256,0.009503999724984169
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,1536,128,0.009088000282645226
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,1536,128,0.009568000212311745
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,1536,256,0.007519999984651804
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,1536,128,0.00723200011998415
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,1536,64,0.00825599953532219
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,1536,64,0.007040000054985285
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,1536,32,0.00800000037997961
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,1536,32,0.009247999638319016
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,1536,32,0.007519999984651804
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,1024,12288,0.01583999954164028
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,1024,16384,0.02038400061428547
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,1024,16384,0.018464000895619392
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,1024,12288,0.014271999709308147
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,1024,16384,0.05446400120854378
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,1536,64,0.009375999681651592
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,1024,12288,0.03299200162291527
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,1024,10240,0.014240000396966934
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,1024,10240,0.014751999638974667
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,1024,8192,0.013183999806642532
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,1024,10240,0.038336001336574554
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,1024,65536,0.051231998950242996
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,1024,8192,0.011744000017642975
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,1024,65536,0.04665600135922432
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,1024,7168,0.012927999719977379
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,1024,8192,0.023679999634623528
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,1024,7168,0.011807999573647976
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,1024,7168,0.021824000403285027
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,1024,6144,0.011327999643981457
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,1024,6144,0.011680000461637974
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,1024,5120,0.01142400037497282
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,1024,6144,0.019936000928282738
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,1024,5120,0.011296000331640244
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,1024,5120,0.01756799966096878
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,1024,4096,0.009759999811649323
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,1024,65536,0.11151999980211258
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,1024,4096,0.009631999768316746
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,1024,4096,0.01849599927663803
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,1024,3584,0.009503999724984169
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,1024,3584,0.010080000385642052
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,1024,3584,0.014431999996304512
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,1024,3072,0.0098879998549819
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,1024,3072,0.009600000455975533
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,1024,3072,0.01321600005030632
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,1024,2560,0.009312000125646591
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,1024,2560,0.009088000282645226
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,1024,2048,0.008608000352978706
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,1024,2560,0.014336000196635723
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,1024,2048,0.009344000369310379
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,1024,1536,0.008191999979317188
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,1024,2048,0.012992000207304955
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,1024,1536,0.009727999567985535
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,1024,1536,0.01104000024497509
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,1024,1024,0.009535999968647957
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,1024,1024,0.008224000222980976
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,1024,1024,0.008927999995648861
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,1024,768,0.0077760000713169575
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,1024,768,0.008927999995648861
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,1024,768,0.01104000024497509
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,1024,512,0.008063999935984612
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,1024,512,0.009279999881982803
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,1024,512,0.009568000212311745
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,1024,256,0.008352000266313553
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,1024,256,0.009503999724984169
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,1024,256,0.00774399982765317
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,1024,128,0.007968000136315823
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,1024,128,0.009088000282645226
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,1024,128,0.007327999919652939
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,1024,64,0.007903999648988247
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,1024,64,0.009759999811649323
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,1024,32,0.009312000125646591
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,1024,64,0.007552000228315592
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,1024,32,0.008224000222980976
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,1024,32,0.0071680000983178616
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,768,12288,0.015456000342965126
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,768,12288,0.013535999692976475
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,768,16384,0.018751999363303185
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,768,16384,0.01696000061929226
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,768,16384,0.039903998374938965
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,768,12288,0.032607998698949814
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,768,10240,0.013791999779641628
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,768,10240,0.014208000153303146
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,768,10240,0.028896000236272812
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,768,8192,0.012736000120639801
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,768,8192,0.011648000217974186
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,768,65536,0.049247998744249344
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,768,7168,0.012799999676644802
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,768,8192,0.024288000538945198
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,768,65536,0.04438399896025658
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,768,7168,0.01152000017464161
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,768,6144,0.011359999887645245
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,768,7168,0.021376000717282295
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,768,6144,0.011711999773979187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,768,5120,0.010847999714314938
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,768,6144,0.019328000023961067
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,768,5120,0.01152000017464161
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,768,65536,0.11078400164842606
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,768,5120,0.016992000862956047
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,768,4096,0.009727999567985535
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,768,4096,0.009568000212311745
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,768,3584,0.009727999567985535
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,768,4096,0.01539199985563755
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,768,3584,0.009344000369310379
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,768,3072,0.009600000455975533
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,768,3584,0.01724799908697605
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,768,2560,0.009375999681651592
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,768,3072,0.01321600005030632
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,768,2560,0.009600000455975533
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,768,2560,0.012223999947309494
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,768,2048,0.00863999966531992
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,768,2048,0.009375999681651592
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,768,1536,0.0080960001796484
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,768,1536,0.009279999881982803
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,768,2048,0.01283199992030859
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,768,1024,0.007872000336647034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,768,1536,0.011264000087976456
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,768,1024,0.009375999681651592
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,768,768,0.008063999935984612
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,768,1024,0.00902399979531765
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,768,768,0.009631999768316746
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,768,512,0.0080960001796484
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,768,768,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,768,512,0.009472000412642956
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,768,512,0.009696000255644321
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,768,256,0.008320000022649765
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,768,256,0.009631999768316746
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,768,256,0.008063999935984612
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,768,3072,0.009151999838650227
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,768,128,0.007968000136315823
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,768,128,0.009279999881982803
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,768,128,0.007391999941319227
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,768,64,0.009184000082314014
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,768,64,0.008511999621987343
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,768,64,0.006976000033318996
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,768,32,0.007903999648988247
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,768,32,0.006912000011652708
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,768,32,0.009375999681651592
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,512,12288,0.014720000326633453
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,512,12288,0.013407999649643898
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,512,16384,0.01942400075495243
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,512,16384,0.0161920003592968
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,512,16384,0.0395519994199276
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,512,12288,0.03276799991726875
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,512,10240,0.013567999936640263
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,512,10240,0.013504000380635262
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,512,10240,0.02879999950528145
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,512,8192,0.012543999589979649
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,512,8192,0.010944000445306301
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,512,65536,0.05503999814391136
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,512,65536,0.04499199986457825
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,512,7168,0.012480000033974648
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,512,8192,0.030112000182271004
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,512,7168,0.011296000331640244
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,512,7168,0.02816000021994114
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,512,6144,0.010944000445306301
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,512,6144,0.011327999643981457
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,512,5120,0.010975999757647514
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,512,5120,0.011231999844312668
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,512,6144,0.025087999179959297
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,512,5120,0.01696000061929226
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,512,65536,0.11100800335407257
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,512,4096,0.009088000282645226
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,512,4096,0.009696000255644321
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,512,3584,0.009440000168979168
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,512,4096,0.014720000326633453
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,512,3584,0.01759999990463257
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,512,3584,0.00863999966531992
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,512,3072,0.009279999881982803
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,512,3072,0.009472000412642956
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,512,2560,0.009568000212311745
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,512,3072,0.013632000423967838
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,512,2560,0.009727999567985535
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,512,2560,0.014175999909639359
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,512,2048,0.008383999578654766
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,512,2048,0.009312000125646591
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,512,2048,0.012319999746978283
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,512,1536,0.008128000423312187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,512,1536,0.00940799992531538
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,512,1024,0.0080960001796484
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,512,1536,0.011103999800980091
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,512,1024,0.009216000325977802
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,512,768,0.008895999751985073
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,512,1024,0.009151999838650227
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,512,768,0.007903999648988247
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,512,768,0.010528000071644783
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,512,512,0.008031999692320824
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,512,512,0.009247999638319016
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,512,512,0.00886400043964386
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,512,256,0.007903999648988247
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,512,256,0.009279999881982803
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,512,256,0.008031999692320824
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,512,128,0.008063999935984612
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,512,128,0.00902399979531765
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,512,128,0.007935999892652035
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,512,64,0.008128000423312187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,512,64,0.008799999952316284
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,512,32,0.007872000336647034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,512,64,0.00723200011998415
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,512,32,0.009247999638319016
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,512,32,0.007135999854654074
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,256,12288,0.013055999763309956
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,256,16384,0.01913600042462349
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,256,16384,0.01571200042963028
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,256,16384,0.039583999663591385
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,256,12288,0.0326399989426136
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,256,10240,0.013183999806642532
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,256,10240,0.01321600005030632
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,256,12288,0.014240000396966934
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,256,8192,0.01206399966031313
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,256,10240,0.03791999816894531
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,256,8192,0.010912000201642513
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,256,65536,0.050144001841545105
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,256,7168,0.01158399973064661
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,256,7168,0.011327999643981457
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,256,65536,0.04150399938225746
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,256,8192,0.03094400092959404
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,256,7168,0.021824000403285027
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,256,6144,0.01142400037497282
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,256,6144,0.010495999827980995
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,256,5120,0.011327999643981457
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,256,5120,0.011455999687314034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,256,6144,0.019071999937295914
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,256,5120,0.022048000246286392
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,256,4096,0.009440000168979168
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,256,65536,0.111455999314785
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,256,4096,0.009216000325977802
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,256,3584,0.009247999638319016
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,256,4096,0.018912000581622124
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,256,3584,0.009247999638319016
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,256,3072,0.009279999881982803
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,256,3072,0.009184000082314014
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,256,3072,0.013055999763309956
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,256,3584,0.01408000010997057
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,256,2560,0.009151999838650227
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,256,2560,0.008991999551653862
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,256,2048,0.009119999594986439
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,256,2048,0.008191999979317188
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,256,2560,0.014303999952971935
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,256,2048,0.012768000364303589
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,256,1536,0.007712000049650669
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,256,1536,0.008895999751985073
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,256,1536,0.009920000098645687
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,256,1024,0.007807999849319458
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,256,1024,0.009216000325977802
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,256,768,0.007615999784320593
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,256,1024,0.009855999611318111
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,256,768,0.009312000125646591
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,256,768,0.011103999800980091
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,256,512,0.007807999849319458
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,256,512,0.009088000282645226
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,256,512,0.009472000412642956
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,256,256,0.0077760000713169575
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,256,256,0.008767999708652496
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,256,128,0.008320000022649765
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,256,128,0.008799999952316284
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,256,256,0.00800000037997961
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,256,128,0.00723200011998415
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,256,64,0.007712000049650669
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,256,64,0.009375999681651592
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,256,64,0.007327999919652939
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,256,32,0.007584000006318092
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,256,32,0.008895999751985073
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,256,32,0.007040000054985285
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,128,12288,0.014560000039637089
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,128,12288,0.012512000277638435
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,128,16384,0.019039999693632126
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,128,16384,0.015968000516295433
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,128,16384,0.039103999733924866
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,128,12288,0.03359999880194664
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,128,10240,0.013248000293970108
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,128,10240,0.012736000120639801
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,128,10240,0.03840000182390213
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,128,8192,0.011744000017642975
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,128,8192,0.011071999557316303
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,128,65536,0.05084799975156784
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,128,8192,0.02319999970495701
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,128,7168,0.011615999974310398
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,128,65536,0.04041599854826927
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,128,7168,0.01065600011497736
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,128,6144,0.010495999827980995
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,128,7168,0.02783999964594841
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,128,6144,0.010975999757647514
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,128,5120,0.010304000228643417
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,128,6144,0.01894400082528591
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,128,5120,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,128,5120,0.02208000048995018
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,128,65536,0.13808000087738037
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,128,4096,0.009247999638319016
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,128,4096,0.008448000065982342
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,128,4096,0.019168000668287277
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,128,3584,0.009184000082314014
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,128,3584,0.008576000109314919
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,128,3584,0.01724799908697605
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,128,3072,0.008927999995648861
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,128,3072,0.008832000195980072
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,128,3072,0.013120000250637531
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,128,2560,0.008832000195980072
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,128,2560,0.008799999952316284
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,128,2560,0.013887999579310417
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,128,2048,0.008736000396311283
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,128,2048,0.007615999784320593
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,128,2048,0.012384000234305859
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,128,1536,0.008576000109314919
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,128,1536,0.007807999849319458
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,128,1536,0.01119999960064888
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,128,1024,0.008608000352978706
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,128,768,0.007615999784320593
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,128,1024,0.008671999908983707
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,128,768,0.008799999952316284
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,128,768,0.010015999898314476
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,128,512,0.007872000336647034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,128,512,0.008767999708652496
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,128,512,0.009088000282645226
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,128,256,0.007424000184983015
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,128,256,0.008671999908983707
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,128,256,0.007807999849319458
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,128,128,0.008895999751985073
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,128,128,0.007327999919652939
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,128,64,0.00774399982765317
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,128,128,0.0074880002066493034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,128,64,0.008511999621987343
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,128,64,0.006976000033318996
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,128,32,0.007552000228315592
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,128,32,0.008736000396311283
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,128,32,0.007424000184983015
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,128,1024,0.0074880002066493034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,64,12288,0.014175999909639359
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,64,16384,0.01894400082528591
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,64,12288,0.012608000077307224
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,64,16384,0.0161920003592968
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,64,16384,0.039712000638246536
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,64,12288,0.03187200054526329
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,64,10240,0.01244799979031086
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,64,10240,0.012959999963641167
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,64,8192,0.01235199999064207
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,64,10240,0.028543999418616295
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,64,8192,0.010847999714314938
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,64,65536,0.05087999999523163
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,64,7168,0.011327999643981457
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,64,65536,0.04259200021624565
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,64,8192,0.024191999807953835
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,64,7168,0.01065600011497736
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,64,6144,0.010432000271975994
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,64,7168,0.021503999829292297
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,64,6144,0.010463999584317207
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,64,5120,0.01033599954098463
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,64,5120,0.010304000228643417
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,64,6144,0.018912000581622124
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,64,5120,0.017055999487638474
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,64,65536,0.138047993183136
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,64,4096,0.00886400043964386
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,64,4096,0.008671999908983707
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,64,4096,0.015072000212967396
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,64,3584,0.00848000030964613
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,64,3584,0.009151999838650227
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,64,3584,0.01664000004529953
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,64,3072,0.00886400043964386
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,64,2560,0.009088000282645226
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,64,3072,0.015263999812304974
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,64,3072,0.008415999822318554
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,64,2560,0.008608000352978706
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,64,2048,0.007968000136315823
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,64,2560,0.013856000266969204
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,64,2048,0.008704000152647495
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,64,1536,0.007519999984651804
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,64,1536,0.008608000352978706
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,64,2048,0.012575999833643436
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,64,1536,0.010080000385642052
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,64,1024,0.0074880002066493034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,64,1024,0.008608000352978706
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,64,1024,0.009920000098645687
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,64,768,0.007360000163316727
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,64,768,0.008767999708652496
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,64,512,0.007552000228315592
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,64,768,0.008799999952316284
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,64,512,0.008511999621987343
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,64,512,0.00854399986565113
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,64,256,0.007679999805986881
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,64,256,0.00848000030964613
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,64,128,0.007712000049650669
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,64,256,0.0074880002066493034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,64,128,0.008576000109314919
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,64,64,0.007424000184983015
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,64,128,0.007104000076651573
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,64,64,0.008767999708652496
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,64,64,0.007199999876320362
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,64,32,0.0074880002066493034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,64,32,0.008960000239312649
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,64,32,0.007199999876320362
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,32,12288,0.01414399966597557
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,32,12288,0.012512000277638435
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,32,16384,0.019200000911951065
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,32,16384,0.01692800037562847
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,32,16384,0.033055998384952545
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,32,12288,0.0315839983522892
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,32,10240,0.012736000120639801
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,32,10240,0.012703999876976013
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,32,8192,0.012160000391304493
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,32,10240,0.024288000538945198
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,32,8192,0.010623999871313572
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,32,65536,0.05353600159287453
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,32,65536,0.043007999658584595
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,32,7168,0.011744000017642975
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,32,7168,0.010495999827980995
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,32,6144,0.010208000428974628
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,32,7168,0.02038400061428547
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,32,6144,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,32,65536,0.13318400084972382
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,32,5120,0.010367999784648418
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,32,6144,0.018848000094294548
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,32,5120,0.010944000445306301
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,32,4096,0.008991999551653862
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,32,5120,0.01648000068962574
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,32,4096,0.008576000109314919
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,32,3584,0.009184000082314014
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,32,4096,0.013183999806642532
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,32,3584,0.008287999778985977
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,32,3584,0.014208000153303146
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,32,3072,0.008511999621987343
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,32,3072,0.008832000195980072
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,32,8192,0.022112000733613968
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,32,3072,0.012959999963641167
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,32,2560,0.00902399979531765
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,32,2560,0.008383999578654766
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,32,2560,0.011935999616980553
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,32,2048,0.007648000027984381
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,32,2048,0.008576000109314919
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,32,2048,0.012512000277638435
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,32,1536,0.008767999708652496
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,32,1024,0.007391999941319227
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,32,1536,0.007648000027984381
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,32,1536,0.01158399973064661
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,32,1024,0.008511999621987343
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,32,768,0.00774399982765317
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,32,1024,0.009727999567985535
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,32,768,0.008960000239312649
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,32,512,0.00854399986565113
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,32,768,0.009088000282645226
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,32,512,0.007360000163316727
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,32,512,0.00902399979531765
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,32,256,0.007455999962985516
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,32,256,0.008448000065982342
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,32,128,0.007552000228315592
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,32,256,0.0077760000713169575
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,32,128,0.008704000152647495
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,32,128,0.007296000141650438
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,32,64,0.007424000184983015
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,32,64,0.008608000352978706
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,32,64,0.007071999832987785
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,8,32,32,0.007455999962985516
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,8,32,32,0.008448000065982342
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,8,32,32,0.007007999811321497
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,65536,12288,0.1908160001039505
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,65536,10240,0.1619199961423874
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,65536,16384,0.25308799743652344
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,65536,12288,0.2744640111923218
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,65536,10240,0.22652800381183624
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,65536,12288,0.2635200023651123
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,65536,16384,0.3800320029258728
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,65536,8192,0.13315199315547943
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,65536,8192,0.18095999956130981
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,65536,16384,0.3481599986553192
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,65536,10240,0.22070400416851044
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,65536,7168,0.162432000041008
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,65536,7168,0.11820799857378006
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,65536,8192,0.17747199535369873
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,65536,6144,0.13894400000572205
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,65536,6144,0.10304000228643417
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,65536,7168,0.15939199924468994
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,65536,5120,0.08755200356245041
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,65536,5120,0.12041600048542023
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,65536,4096,0.07196799665689468
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,65536,4096,0.09670399874448776
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,65536,6144,0.13631999492645264
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,65536,3584,0.0655680000782013
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,65536,5120,0.11606399714946747
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,65536,4096,0.09328000247478485
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,65536,3584,0.08681599795818329
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,65536,3072,0.08144000172615051
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,65536,3072,0.057440001517534256
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,65536,2560,0.04934399947524071
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,65536,3584,0.08432000130414963
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,65536,2560,0.07011199742555618
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,65536,2048,0.05446400120854378
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,65536,3072,0.07574400305747986
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,65536,2560,0.07129599899053574
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,65536,2048,0.04185599833726883
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,65536,1536,0.04614400118589401
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,65536,1536,0.0352960005402565
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,65536,2048,0.054687999188899994
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,65536,1024,0.02755199931561947
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,65536,1024,0.03411199897527695
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,65536,1536,0.04800000041723251
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,65536,768,0.028672000393271446
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,65536,1024,0.04323200136423111
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,65536,768,0.02319999970495701
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,65536,512,0.0197759997099638
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,65536,512,0.022816000506281853
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,65536,768,0.04195199906826019
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,65536,256,0.016448000445961952
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,65536,512,0.041120000183582306
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,65536,256,0.016256000846624374
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,65536,256,0.040511999279260635
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,65536,128,0.012799999676644802
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,65536,128,0.03996799886226654
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,65536,64,0.015807999297976494
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,65536,64,0.01651199907064438
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,65536,32,0.015904000028967857
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,65536,64,0.03984000161290169
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,65536,32,0.016672000288963318
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,65536,32,0.039872001856565475
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,16384,12288,0.07798399776220322
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,16384,16384,0.07177600264549255
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,65536,128,0.013632000423967838
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,16384,16384,0.10035199671983719
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,16384,12288,0.05644800141453743
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,16384,16384,0.09337600320577621
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,16384,10240,0.06646399945020676
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,16384,12288,0.07212799787521362
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,16384,10240,0.04819199815392494
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,16384,8192,0.05532800033688545
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,16384,8192,0.04214400053024292
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,16384,10240,0.060736000537872314
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,16384,7168,0.0514880008995533
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,16384,8192,0.049855999648571014
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,16384,65536,0.2532159984111786
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,16384,7168,0.036928001791238785
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,16384,6144,0.04403200000524521
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,16384,7168,0.04604800045490265
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,16384,6144,0.03385600075125694
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,16384,5120,0.038784001022577286
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,16384,5120,0.029279999434947968
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,16384,6144,0.03984000161290169
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,16384,5120,0.035679999738931656
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,16384,4096,0.03311999887228012
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,16384,65536,0.3850240111351013
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,16384,4096,0.02537599951028824
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,16384,3584,0.02985600009560585
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,16384,4096,0.029023999348282814
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,16384,3584,0.023615999147295952
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,16384,3072,0.028224000707268715
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,16384,3584,0.026208000257611275
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,16384,3072,0.02160000056028366
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,16384,3072,0.024288000538945198
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,16384,2560,0.01913600042462349
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,16384,65536,0.39129599928855896
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,16384,2560,0.02672000043094158
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,16384,2560,0.022752000018954277
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,16384,2048,0.017023999243974686
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,16384,2048,0.022304000332951546
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,16384,2048,0.0191040001809597
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,16384,1536,0.01990400068461895
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,16384,1536,0.015359999611973763
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,16384,1536,0.01651199907064438
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,16384,1024,0.01583999954164028
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,16384,1024,0.013632000423967838
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,16384,768,0.01462399959564209
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,16384,1024,0.01548799965530634
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,16384,768,0.012864000163972378
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,16384,512,0.012415999546647072
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,16384,768,0.01539199985563755
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,16384,512,0.012128000147640705
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,16384,512,0.015039999969303608
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,16384,256,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,16384,256,0.010591999627649784
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,16384,256,0.014720000326633453
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,16384,128,0.010080000385642052
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,16384,64,0.013856000266969204
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,16384,128,0.009920000098645687
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,16384,128,0.014976000413298607
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,16384,64,0.013663999736309052
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,16384,32,0.013663999736309052
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,16384,64,0.014911999925971031
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,16384,32,0.013535999692976475
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,16384,32,0.014976000413298607
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,12288,12288,0.06128000095486641
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,12288,12288,0.04684799909591675
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,12288,16384,0.059328000992536545
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,12288,16384,0.07811199873685837
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,12288,16384,0.0729919970035553
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,12288,12288,0.05641600117087364
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,12288,10240,0.05142400041222572
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,12288,10240,0.04224000126123428
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,12288,10240,0.04841599985957146
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,12288,8192,0.042527999728918076
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,12288,8192,0.03436800092458725
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,12288,7168,0.039264000952243805
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,12288,8192,0.039583999663591385
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,12288,7168,0.032896000891923904
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,12288,65536,0.19596800208091736
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,12288,7168,0.03702399879693985
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,12288,6144,0.030912000685930252
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,12288,6144,0.034272000193595886
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,12288,5120,0.027936000376939774
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,12288,5120,0.03014400042593479
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,12288,6144,0.031968001276254654
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,12288,65536,0.2925119996070862
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,12288,4096,0.026208000257611275
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,12288,5120,0.029055999591946602
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,12288,4096,0.022816000506281853
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,12288,3584,0.024000000208616257
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,12288,3584,0.02191999927163124
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,12288,3072,0.02208000048995018
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,12288,3584,0.022336000576615334
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,12288,3072,0.020959999412298203
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,12288,65536,0.3051519989967346
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,12288,2560,0.01881599985063076
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,12288,3072,0.020864000543951988
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,12288,2560,0.021056000143289566
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,12288,2048,0.017184000462293625
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,12288,2560,0.01897599920630455
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,12288,2048,0.017216000705957413
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,12288,2048,0.01571200042963028
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,12288,1536,0.01587199978530407
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,12288,1536,0.015647999942302704
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,12288,1536,0.013887999579310417
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,12288,1024,0.015039999969303608
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,12288,1024,0.013183999806642532
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,12288,1024,0.013632000423967838
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,12288,768,0.013887999579310417
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,12288,4096,0.023744000121951103
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,12288,768,0.01235199999064207
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,12288,512,0.01244799979031086
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,12288,512,0.011935999616980553
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,12288,768,0.013151999562978745
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,12288,256,0.010463999584317207
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,12288,256,0.010400000028312206
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,12288,512,0.012864000163972378
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,12288,256,0.012992000207304955
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,12288,128,0.01033599954098463
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,12288,128,0.009440000168979168
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,12288,128,0.012896000407636166
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,12288,64,0.012640000320971012
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,12288,64,0.013535999692976475
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,12288,32,0.012256000190973282
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,12288,64,0.012959999963641167
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,12288,32,0.013824000023305416
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,12288,32,0.012608000077307224
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,10240,12288,0.054368000477552414
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,10240,12288,0.04054399952292442
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,10240,16384,0.050592001527547836
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,10240,16384,0.07046400010585785
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,10240,16384,0.061983998864889145
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,10240,12288,0.049375999718904495
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,10240,10240,0.04566400125622749
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,10240,10240,0.038015998899936676
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,10240,10240,0.04153599962592125
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,10240,8192,0.03798399865627289
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,10240,8192,0.03033600002527237
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,10240,7168,0.034752000123262405
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,10240,8192,0.0342399999499321
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,10240,7168,0.028255999088287354
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,10240,65536,0.16838400065898895
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,10240,6144,0.031007999554276466
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,10240,7168,0.03280000016093254
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,10240,6144,0.027327999472618103
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,10240,65536,0.25577598810195923
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,10240,6144,0.027807999402284622
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,10240,5120,0.025599999353289604
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,10240,5120,0.02816000021994114
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,10240,4096,0.023135999217629433
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,10240,5120,0.02534399926662445
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,10240,4096,0.019487999379634857
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,10240,3584,0.02143999934196472
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,10240,3584,0.019231999292969704
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,10240,4096,0.021088000386953354
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,10240,3584,0.01974399946630001
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,10240,3072,0.021344000473618507
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,10240,3072,0.018271999433636665
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,10240,3072,0.017983999103307724
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,10240,65536,0.25891199707984924
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,10240,2560,0.019231999292969704
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,10240,2560,0.017472000792622566
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,10240,2048,0.017664000391960144
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,10240,2560,0.017983999103307724
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,10240,2048,0.01571200042963028
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,10240,2048,0.014303999952971935
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,10240,1536,0.01500799972563982
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,10240,1536,0.014751999638974667
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,10240,1536,0.012927999719977379
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,10240,1024,0.013120000250637531
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,10240,1024,0.012992000207304955
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,10240,768,0.013439999893307686
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,10240,1024,0.012543999589979649
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,10240,768,0.012575999833643436
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,10240,512,0.011615999974310398
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,10240,768,0.012095999903976917
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,10240,512,0.011680000461637974
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,10240,256,0.010400000028312206
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,10240,256,0.010591999627649784
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,10240,512,0.011648000217974186
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,10240,128,0.00979200005531311
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,10240,256,0.012319999746978283
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,10240,128,0.009247999638319016
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,10240,128,0.012128000147640705
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,10240,64,0.01158399973064661
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,10240,64,0.013439999893307686
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,10240,64,0.011711999773979187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,10240,32,0.011392000131309032
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,10240,32,0.013952000066637993
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,10240,32,0.011231999844312668
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,8192,12288,0.03347200155258179
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,8192,16384,0.04185599833726883
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,8192,16384,0.06745599955320358
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,8192,16384,0.05152000114321709
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,8192,12288,0.04095999896526337
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,8192,10240,0.031199999153614044
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,8192,10240,0.0379519984126091
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,8192,12288,0.04419200122356415
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,8192,10240,0.03484800085425377
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,8192,8192,0.03267199918627739
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,8192,8192,0.025599999353289604
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,8192,7168,0.030912000685930252
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,8192,8192,0.02860799990594387
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,8192,65536,0.13500800728797913
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,8192,7168,0.0244159996509552
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,8192,7168,0.027488000690937042
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,8192,6144,0.02595200017094612
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,8192,6144,0.024000000208616257
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,8192,65536,0.19551999866962433
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,8192,5120,0.032416000962257385
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,8192,5120,0.021695999428629875
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,8192,6144,0.023520000278949738
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,8192,4096,0.020447999238967896
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,8192,5120,0.02147199958562851
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,8192,4096,0.016992000862956047
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,8192,3584,0.018464000895619392
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,8192,4096,0.01865600049495697
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,8192,65536,0.20287999510765076
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,8192,3584,0.016831999644637108
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,8192,3584,0.016863999888300896
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,8192,3072,0.01571200042963028
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,8192,3072,0.016992000862956047
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,8192,3072,0.015744000673294067
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,8192,2560,0.017503999173641205
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,8192,2560,0.015776000916957855
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,8192,2048,0.014368000440299511
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,8192,2048,0.014911999925971031
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,8192,2560,0.015615999698638916
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,8192,1536,0.012927999719977379
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,8192,2048,0.012319999746978283
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,8192,1536,0.014495999552309513
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,8192,1536,0.011359999887645245
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,8192,1024,0.01235199999064207
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,8192,1024,0.01244799979031086
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,8192,768,0.011071999557316303
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,8192,1024,0.010847999714314938
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,8192,768,0.011168000288307667
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,8192,768,0.010400000028312206
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,8192,512,0.010688000358641148
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,8192,512,0.010143999941647053
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,8192,256,0.010143999941647053
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,8192,256,0.009279999881982803
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,8192,512,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,8192,256,0.010495999827980995
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,8192,128,0.009312000125646591
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,8192,128,0.00940799992531538
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,8192,128,0.010432000271975994
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,8192,64,0.013663999736309052
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,8192,64,0.013055999763309956
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,8192,64,0.010367999784648418
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,8192,32,0.010048000141978264
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,8192,32,0.012543999589979649
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,8192,32,0.010304000228643417
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,7168,12288,0.04105599969625473
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,7168,12288,0.03190400078892708
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,7168,16384,0.03855999931693077
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,7168,16384,0.05087999999523163
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,7168,16384,0.046751998364925385
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,7168,10240,0.03446400165557861
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,7168,12288,0.03711999952793121
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,7168,10240,0.029343999922275543
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,7168,10240,0.032896000891923904
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,7168,8192,0.029152000322937965
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,7168,8192,0.023679999634623528
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,7168,7168,0.026048000901937485
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,7168,8192,0.026623999699950218
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,7168,65536,0.1233920007944107
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,7168,7168,0.022624000906944275
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,7168,65536,0.1777919977903366
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,7168,7168,0.025599999353289604
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,7168,6144,0.024191999807953835
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,7168,6144,0.021663999184966087
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,7168,5120,0.02112000063061714
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,7168,5120,0.021663999184966087
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,7168,6144,0.021695999428629875
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,7168,4096,0.01571200042963028
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,7168,5120,0.020128000527620316
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,7168,4096,0.019711999222636223
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,7168,3584,0.01548799965530634
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,7168,4096,0.01772800087928772
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,7168,3584,0.017503999173641205
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,7168,3584,0.015744000673294067
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,7168,65536,0.19126400351524353
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,7168,3072,0.016127999871969223
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,7168,3072,0.014783999882638454
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,7168,2560,0.014592000283300877
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,7168,2560,0.015552000142633915
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,7168,3072,0.015039999969303608
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,7168,2560,0.014336000196635723
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,7168,2048,0.014175999909639359
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,7168,2048,0.013856000266969204
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,7168,1536,0.012736000120639801
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,7168,1536,0.013535999692976475
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,7168,2048,0.011807999573647976
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,7168,1536,0.011455999687314034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,7168,1024,0.011711999773979187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,7168,1024,0.010400000028312206
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,7168,768,0.010623999871313572
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,7168,768,0.010688000358641148
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,7168,512,0.009631999768316746
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,7168,768,0.009952000342309475
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,7168,512,0.010208000428974628
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,7168,512,0.00979200005531311
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,7168,256,0.009696000255644321
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,7168,256,0.009696000255644321
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,7168,256,0.0098879998549819
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,7168,128,0.009759999811649323
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,7168,128,0.009247999638319016
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,7168,128,0.010015999898314476
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,7168,64,0.009727999567985535
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,7168,64,0.013248000293970108
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,7168,64,0.009631999768316746
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,7168,32,0.010048000141978264
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,7168,32,0.012736000120639801
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,7168,32,0.009855999611318111
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,7168,1024,0.011008000001311302
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,6144,12288,0.03686400130391121
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,6144,12288,0.027103999629616737
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,6144,16384,0.034304000437259674
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,6144,16384,0.04694399982690811
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,6144,16384,0.03996799886226654
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,6144,12288,0.03248000144958496
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,6144,10240,0.025439999997615814
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,6144,10240,0.032127998769283295
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,6144,8192,0.028736000880599022
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,6144,10240,0.02751999907195568
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,6144,8192,0.02115200087428093
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,6144,7168,0.02521600015461445
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,6144,65536,0.10364799946546555
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,6144,8192,0.023584000766277313
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,6144,7168,0.020128000527620316
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,6144,65536,0.163455992937088
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,6144,6144,0.022655999287962914
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,6144,7168,0.021727999672293663
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,6144,6144,0.018783999606966972
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,6144,6144,0.02038400061428547
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,6144,5120,0.01711999997496605
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,6144,5120,0.020191999152302742
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,6144,4096,0.017791999503970146
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,6144,4096,0.014495999552309513
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,6144,5120,0.017855999991297722
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,6144,4096,0.015936000272631645
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,6144,3584,0.014015999622642994
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,6144,3584,0.015584000386297703
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,6144,3584,0.014560000039637089
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,6144,3072,0.015359999611973763
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,6144,3072,0.01369599997997284
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,6144,65536,0.14899200201034546
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,6144,2560,0.01571200042963028
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,6144,3072,0.013663999736309052
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,6144,2048,0.01283199992030859
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,6144,2560,0.012768000364303589
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,6144,2560,0.013279999606311321
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,6144,2048,0.012319999746978283
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,6144,1536,0.01235199999064207
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,6144,2048,0.010944000445306301
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,6144,1536,0.012191999703645706
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,6144,1536,0.010975999757647514
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,6144,1024,0.011327999643981457
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,6144,1024,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,6144,768,0.01056000031530857
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,6144,1024,0.009375999681651592
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,6144,768,0.011071999557316303
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,6144,768,0.009440000168979168
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,6144,512,0.011839999817311764
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,6144,512,0.01027199998497963
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,6144,256,0.009216000325977802
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,6144,512,0.009247999638319016
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,6144,256,0.009759999811649323
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,6144,256,0.009119999594986439
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,6144,128,0.009312000125646591
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,6144,128,0.009184000082314014
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,6144,128,0.009855999611318111
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,6144,64,0.009375999681651592
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,6144,64,0.010847999714314938
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,6144,32,0.013439999893307686
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,6144,64,0.009119999594986439
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,6144,32,0.01033599954098463
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,6144,32,0.00902399979531765
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,5120,12288,0.02412799932062626
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,5120,12288,0.03296000137925148
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,5120,16384,0.029311999678611755
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,5120,16384,0.04070400074124336
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,5120,16384,0.03590400144457817
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,5120,12288,0.029023999348282814
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,5120,10240,0.023072000592947006
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,5120,10240,0.029472000896930695
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,5120,8192,0.024320000782608986
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,5120,8192,0.01852799952030182
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,5120,7168,0.022816000506281853
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,5120,65536,0.08912000060081482
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,5120,8192,0.020927999168634415
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,5120,65536,0.146464005112648
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,5120,7168,0.018271999433636665
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,5120,7168,0.019648000597953796
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,5120,6144,0.020576000213623047
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,5120,5120,0.019071999937295914
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,5120,6144,0.017920000478625298
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,5120,5120,0.016607999801635742
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,5120,6144,0.018271999433636665
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,5120,10240,0.025151999667286873
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,5120,5120,0.015936000272631645
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,5120,4096,0.01635199971497059
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,5120,4096,0.013279999606311321
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,5120,3584,0.015072000212967396
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,5120,4096,0.01408000010997057
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,5120,65536,0.14428800344467163
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,5120,3584,0.012640000320971012
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,5120,3584,0.012864000163972378
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,5120,3072,0.012736000120639801
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,5120,3072,0.014208000153303146
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,5120,2560,0.012223999947309494
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,5120,2560,0.013952000066637993
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,5120,3072,0.01283199992030859
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,5120,2560,0.01228800043463707
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,5120,2048,0.01206399966031313
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,5120,2048,0.011935999616980553
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,5120,2048,0.010975999757647514
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,5120,1536,0.01119999960064888
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,5120,1536,0.011744000017642975
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,5120,1024,0.010400000028312206
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,5120,1024,0.011136000044643879
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,5120,1024,0.008991999551653862
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,5120,1536,0.009727999567985535
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,5120,768,0.009759999811649323
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,5120,768,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,5120,768,0.008991999551653862
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,5120,512,0.009088000282645226
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,5120,512,0.010367999784648418
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,5120,256,0.008960000239312649
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,5120,512,0.009247999638319016
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,5120,256,0.009631999768316746
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,5120,256,0.008799999952316284
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,5120,128,0.009503999724984169
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,5120,128,0.009600000455975533
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,5120,128,0.00902399979531765
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,5120,64,0.009824000298976898
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,5120,64,0.010591999627649784
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,5120,64,0.008960000239312649
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,5120,32,0.009920000098645687
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,5120,32,0.010304000228643417
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,5120,32,0.008608000352978706
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,4096,12288,0.02489599958062172
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,4096,12288,0.023231999948620796
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,4096,16384,0.030208000913262367
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,4096,16384,0.029440000653266907
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,4096,16384,0.028672000393271446
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,4096,12288,0.02300800010561943
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,4096,10240,0.02179200015962124
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,4096,10240,0.021247999742627144
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,4096,10240,0.02112000063061714
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,4096,8192,0.019007999449968338
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,4096,8192,0.01711999997496605
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,4096,7168,0.017535999417304993
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,4096,8192,0.017664000391960144
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,4096,65536,0.07827199995517731
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,4096,6144,0.01651199907064438
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,4096,7168,0.016896000131964684
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,4096,65536,0.10022400319576263
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,4096,7168,0.016704000532627106
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,4096,6144,0.015552000142633915
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,4096,6144,0.014911999925971031
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,4096,5120,0.014688000082969666
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,4096,5120,0.015424000099301338
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,4096,4096,0.01369599997997284
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,4096,4096,0.012799999676644802
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,4096,5120,0.01375999953597784
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,4096,65536,0.1058880016207695
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,4096,3584,0.012543999589979649
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,4096,4096,0.012415999546647072
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,4096,3584,0.011872000060975552
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,4096,3072,0.011839999817311764
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,4096,3072,0.011807999573647976
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,4096,3584,0.011615999974310398
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,4096,3072,0.011359999887645245
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,4096,2560,0.011935999616980553
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,4096,2560,0.011455999687314034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,4096,2560,0.010688000358641148
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,4096,2048,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,4096,2048,0.011264000087976456
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,4096,1536,0.009824000298976898
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,4096,1024,0.008736000396311283
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,4096,1536,0.011327999643981457
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,4096,2048,0.009312000125646591
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,4096,1536,0.008895999751985073
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,4096,1024,0.010495999827980995
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,4096,1024,0.008287999778985977
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,4096,768,0.00863999966531992
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,4096,768,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,4096,512,0.010528000071644783
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,4096,512,0.008224000222980976
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,4096,768,0.00825599953532219
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,4096,512,0.008063999935984612
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,4096,256,0.007807999849319458
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,4096,256,0.009344000369310379
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,4096,128,0.009184000082314014
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,4096,128,0.00774399982765317
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,4096,64,0.007872000336647034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,4096,128,0.008031999692320824
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,4096,64,0.01033599954098463
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,4096,32,0.007712000049650669
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,4096,64,0.008128000423312187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,4096,32,0.010400000028312206
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,4096,32,0.0080960001796484
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,3584,12288,0.02319999970495701
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,3584,16384,0.026399999856948853
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,3584,16384,0.02831999957561493
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,4096,256,0.00774399982765317
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,3584,16384,0.02723200060427189
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,3584,12288,0.021503999829292297
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,3584,10240,0.02006400004029274
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,3584,12288,0.02179200015962124
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,3584,10240,0.02051199972629547
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,3584,8192,0.018144000321626663
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,3584,10240,0.019039999693632126
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,3584,8192,0.01664000004529953
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,3584,7168,0.016575999557971954
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,3584,65536,0.07228799909353256
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,3584,8192,0.01651199907064438
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,3584,7168,0.016127999871969223
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,3584,65536,0.09321600198745728
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,3584,7168,0.016383999958634377
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,3584,6144,0.015456000342965126
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,3584,6144,0.015456000342965126
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,3584,5120,0.015615999698638916
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,3584,5120,0.014208000153303146
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,3584,6144,0.014336000196635723
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,3584,5120,0.013279999606311321
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,3584,4096,0.013632000423967838
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,3584,4096,0.011776000261306763
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,3584,65536,0.09478399902582169
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,3584,4096,0.011839999817311764
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,3584,3584,0.01228800043463707
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,3584,3584,0.01152000017464161
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,3584,3072,0.011744000017642975
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,3584,3584,0.011264000087976456
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,3584,3072,0.012032000347971916
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,3584,3072,0.010623999871313572
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,3584,2560,0.011071999557316303
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,3584,2560,0.011487999930977821
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,3584,2048,0.010623999871313572
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,3584,2560,0.010432000271975994
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,3584,2048,0.011103999800980091
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,3584,1536,0.009824000298976898
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,3584,1536,0.010623999871313572
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,3584,2048,0.009279999881982803
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,3584,1024,0.010495999827980995
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,3584,1024,0.008671999908983707
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,3584,1536,0.00848000030964613
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,3584,1024,0.007968000136315823
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,3584,768,0.008415999822318554
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,3584,768,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,3584,512,0.007935999892652035
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,3584,768,0.007935999892652035
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,3584,512,0.010143999941647053
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,3584,512,0.007903999648988247
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,3584,256,0.010304000228643417
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,3584,256,0.007552000228315592
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,3584,256,0.0080960001796484
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,3584,128,0.009279999881982803
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,3584,128,0.007872000336647034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,3584,128,0.007648000027984381
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,3584,64,0.012671999633312225
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,3584,64,0.009983999654650688
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,3584,64,0.007935999892652035
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,3584,32,0.007615999784320593
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,3584,32,0.01056000031530857
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,3584,32,0.007968000136315823
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,3072,12288,0.021183999255299568
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,3072,12288,0.017823999747633934
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,3072,16384,0.021407999098300934
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,3072,16384,0.024927999824285507
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,3072,16384,0.024480000138282776
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,3072,12288,0.020608000457286835
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,3072,10240,0.018624000251293182
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,3072,10240,0.01635199971497059
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,3072,10240,0.018079999834299088
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,3072,8192,0.016736000776290894
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,3072,8192,0.014240000396966934
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,3072,7168,0.01532800029963255
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,3072,8192,0.015968000516295433
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,3072,65536,0.05849599838256836
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,3072,7168,0.013919999822974205
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,3072,65536,0.08339200168848038
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,3072,7168,0.01462399959564209
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,3072,6144,0.014240000396966934
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,3072,6144,0.012736000120639801
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,3072,5120,0.012799999676644802
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,3072,6144,0.013344000093638897
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,3072,5120,0.01206399966031313
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,3072,5120,0.01228800043463707
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,3072,4096,0.012384000234305859
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,3072,4096,0.011776000261306763
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,3072,65536,0.08934400230646133
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,3072,3584,0.01033599954098463
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,3072,3584,0.01119999960064888
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,3072,3584,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,3072,3072,0.01056000031530857
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,3072,3072,0.011008000001311302
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,3072,3072,0.009920000098645687
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,3072,2560,0.010623999871313572
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,3072,2560,0.010143999941647053
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,3072,2560,0.00979200005531311
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,3072,2048,0.010208000428974628
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,3072,2048,0.009216000325977802
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,3072,2048,0.008767999708652496
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,3072,1536,0.00886400043964386
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,3072,1536,0.009344000369310379
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,3072,1024,0.0081599997356534
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,3072,1536,0.008224000222980976
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,3072,1024,0.008383999578654766
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,3072,4096,0.010367999784648418
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,3072,1024,0.007968000136315823
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,3072,768,0.007807999849319458
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,3072,768,0.0081599997356534
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,3072,768,0.007935999892652035
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,3072,512,0.00723200011998415
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,3072,512,0.007968000136315823
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,3072,512,0.007872000336647034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,3072,256,0.007296000141650438
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,3072,128,0.0071680000983178616
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,3072,256,0.008191999979317188
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,3072,256,0.0074880002066493034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,3072,128,0.0080960001796484
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,3072,64,0.0072639998979866505
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,3072,128,0.007712000049650669
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,3072,64,0.00825599953532219
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,3072,64,0.007712000049650669
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,3072,32,0.0071680000983178616
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,3072,32,0.008031999692320824
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,3072,32,0.007679999805986881
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,2560,12288,0.0191040001809597
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,2560,12288,0.01727999933063984
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,2560,16384,0.022911999374628067
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,2560,16384,0.021247999742627144
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,2560,16384,0.022975999861955643
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,2560,12288,0.019071999937295914
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,2560,10240,0.016992000862956047
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,2560,10240,0.01651199907064438
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,2560,10240,0.016831999644637108
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,2560,8192,0.014911999925971031
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,2560,8192,0.014015999622642994
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,2560,8192,0.014495999552309513
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,2560,7168,0.014495999552309513
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,2560,65536,0.05734400078654289
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,2560,65536,0.0769599974155426
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,2560,7168,0.013791999779641628
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,2560,6144,0.013504000380635262
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,2560,6144,0.01244799979031086
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,2560,7168,0.01408000010997057
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,2560,6144,0.013120000250637531
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,2560,5120,0.012415999546647072
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,2560,5120,0.012095999903976917
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,2560,5120,0.011648000217974186
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,2560,4096,0.01196799986064434
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,2560,65536,0.08265600353479385
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,2560,4096,0.010912000201642513
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,2560,4096,0.01056000031530857
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,2560,3584,0.010239999741315842
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,2560,3584,0.01119999960064888
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,2560,3584,0.009664000011980534
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,2560,3072,0.010463999584317207
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,2560,3072,0.010080000385642052
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,2560,2560,0.010400000028312206
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,2560,3072,0.009503999724984169
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,2560,2560,0.009631999768316746
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,2560,2560,0.009696000255644321
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,2560,2048,0.009664000011980534
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,2560,2048,0.008927999995648861
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,2560,1536,0.009056000038981438
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,2560,2048,0.008736000396311283
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,2560,1536,0.008704000152647495
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,2560,1536,0.007840000092983246
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,2560,1024,0.007935999892652035
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,2560,1024,0.0081599997356534
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,2560,768,0.007872000336647034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,2560,1024,0.007872000336647034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,2560,512,0.007104000076651573
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,2560,768,0.00863999966531992
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,2560,256,0.0080960001796484
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,2560,256,0.0071680000983178616
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,2560,768,0.007552000228315592
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,2560,512,0.007840000092983246
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,2560,512,0.007712000049650669
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,2560,256,0.00774399982765317
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,2560,128,0.006976000033318996
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,2560,128,0.008383999578654766
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,2560,64,0.0077760000713169575
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,2560,128,0.007424000184983015
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,2560,32,0.007327999919652939
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,2560,64,0.006976000033318996
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,2560,64,0.007712000049650669
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,2560,32,0.00774399982765317
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,2560,32,0.007679999805986881
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,2048,12288,0.017184000462293625
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,2048,12288,0.014495999552309513
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,2048,16384,0.01724799908697605
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,2048,16384,0.017311999574303627
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,2048,12288,0.014368000440299511
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,2048,10240,0.015135999768972397
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,2048,10240,0.014271999709308147
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,2048,10240,0.012992000207304955
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,2048,8192,0.01398400031030178
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,2048,8192,0.011711999773979187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,2048,16384,0.020479999482631683
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,2048,65536,0.04473600164055824
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,2048,8192,0.012095999903976917
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,2048,65536,0.06943999975919724
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,2048,7168,0.013024000450968742
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,2048,7168,0.011455999687314034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,2048,6144,0.011136000044643879
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,2048,6144,0.011807999573647976
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,2048,7168,0.01104000024497509
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,2048,5120,0.011455999687314034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,2048,6144,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,2048,5120,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,2048,65536,0.054687999188899994
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,2048,5120,0.010304000228643417
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,2048,3584,0.009952000342309475
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,2048,4096,0.010495999827980995
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,2048,4096,0.009472000412642956
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,2048,4096,0.009151999838650227
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,2048,3584,0.00863999966531992
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,2048,3584,0.009247999638319016
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,2048,3072,0.00886400043964386
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,2048,3072,0.010080000385642052
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,2048,2560,0.009696000255644321
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,2048,2560,0.009279999881982803
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,2048,3072,0.008191999979317188
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,2048,2048,0.008576000109314919
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,2048,2560,0.0080960001796484
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,2048,2048,0.008704000152647495
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,2048,2048,0.007872000336647034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,2048,1536,0.00848000030964613
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,2048,1536,0.008767999708652496
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,2048,1024,0.007712000049650669
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,2048,1536,0.007519999984651804
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,2048,1024,0.007968000136315823
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,2048,1024,0.006752000190317631
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,2048,768,0.007935999892652035
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,2048,768,0.00800000037997961
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,2048,768,0.007040000054985285
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,2048,512,0.007071999832987785
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,2048,512,0.0081599997356534
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,2048,512,0.006816000211983919
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,2048,256,0.009440000168979168
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,2048,256,0.007840000092983246
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,2048,128,0.00684799998998642
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,2048,256,0.006752000190317631
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,2048,128,0.00800000037997961
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,2048,128,0.007071999832987785
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,2048,64,0.006943999789655209
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,2048,64,0.00800000037997961
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,2048,64,0.006527999881654978
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,2048,32,0.006912000011652708
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,2048,32,0.00825599953532219
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,2048,32,0.00684799998998642
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,1536,12288,0.015936000272631645
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,1536,12288,0.014047999866306782
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,1536,16384,0.018400000408291817
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,1536,16384,0.01635199971497059
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,1536,16384,0.01484800036996603
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,1536,12288,0.012799999676644802
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,1536,10240,0.014368000440299511
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,1536,10240,0.013824000023305416
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,1536,10240,0.011552000418305397
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,1536,8192,0.012480000033974648
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,1536,8192,0.011487999930977821
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,1536,8192,0.010912000201642513
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,1536,7168,0.012640000320971012
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,1536,65536,0.04451199993491173
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,1536,65536,0.058240000158548355
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,1536,7168,0.011872000060975552
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,1536,7168,0.00979200005531311
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,1536,6144,0.011136000044643879
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,1536,5120,0.01119999960064888
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,1536,6144,0.011264000087976456
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,1536,6144,0.009855999611318111
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,1536,5120,0.011103999800980091
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,1536,5120,0.008767999708652496
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,1536,4096,0.009056000038981438
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,1536,65536,0.04540799930691719
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,1536,4096,0.009696000255644321
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,1536,4096,0.008799999952316284
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,1536,3584,0.009151999838650227
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,1536,3584,0.009920000098645687
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,1536,3072,0.009727999567985535
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,1536,3584,0.00800000037997961
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,1536,3072,0.008704000152647495
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,1536,3072,0.007615999784320593
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,1536,2560,0.009312000125646591
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,1536,2560,0.00902399979531765
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,1536,2560,0.0081599997356534
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,1536,2048,0.00825599953532219
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,1536,2048,0.008895999751985073
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,1536,1536,0.0080960001796484
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,1536,1536,0.008415999822318554
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,1536,1024,0.007903999648988247
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,1536,1536,0.007104000076651573
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,1536,1024,0.008352000266313553
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,1536,1024,0.006432000081986189
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,1536,768,0.00774399982765317
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,1536,768,0.0077760000713169575
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,1536,768,0.006624000146985054
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,1536,512,0.007135999854654074
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,1536,512,0.007807999849319458
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,1536,256,0.007071999832987785
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,1536,256,0.007519999984651804
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,1536,256,0.006432000081986189
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,1536,512,0.006624000146985054
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,1536,128,0.006976000033318996
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,1536,128,0.0063680000603199005
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,1536,128,0.007872000336647034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,1536,64,0.007584000006318092
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,1536,64,0.007071999832987785
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,1536,32,0.006783999968320131
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,1536,64,0.006624000146985054
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,1536,32,0.007424000184983015
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,1536,32,0.006240000016987324
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,1536,2048,0.007424000184983015
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,1024,12288,0.013919999822974205
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,1024,12288,0.012959999963641167
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,1024,16384,0.016863999888300896
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,1024,16384,0.015168000012636185
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,1024,16384,0.012512000277638435
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,1024,12288,0.010688000358641148
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,1024,10240,0.012608000077307224
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,1024,10240,0.012703999876976013
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,1024,10240,0.010111999697983265
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,1024,8192,0.011455999687314034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,1024,8192,0.010495999827980995
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,1024,7168,0.011487999930977821
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,1024,65536,0.037376001477241516
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,1024,65536,0.05135999992489815
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,1024,7168,0.010912000201642513
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,1024,8192,0.009312000125646591
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,1024,6144,0.010688000358641148
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,1024,7168,0.008927999995648861
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,1024,6144,0.010304000228643417
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,1024,6144,0.008224000222980976
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,1024,5120,0.01017600018531084
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,1024,5120,0.010463999584317207
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,1024,5120,0.0081599997356534
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,1024,4096,0.009151999838650227
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,1024,65536,0.0350399985909462
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,1024,4096,0.008320000022649765
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,1024,3584,0.009088000282645226
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,1024,3584,0.00825599953532219
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,1024,3072,0.008608000352978706
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,1024,3072,0.00848000030964613
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,1024,4096,0.008063999935984612
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,1024,3584,0.007519999984651804
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,1024,3072,0.0072639998979866505
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,1024,2560,0.008511999621987343
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,1024,2560,0.007935999892652035
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,1024,2560,0.007519999984651804
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,1024,2048,0.007584000006318092
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,1024,2048,0.008287999778985977
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,1024,1536,0.007615999784320593
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,1024,2048,0.007071999832987785
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,1024,1536,0.008128000423312187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,1024,1024,0.007007999811321497
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,1024,1536,0.006432000081986189
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,1024,1024,0.008383999578654766
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,1024,1024,0.006591999903321266
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,1024,768,0.006943999789655209
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,1024,768,0.008128000423312187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,1024,512,0.0066559999249875546
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,1024,768,0.006271999794989824
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,1024,512,0.007872000336647034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,1024,512,0.0063680000603199005
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,1024,256,0.007135999854654074
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,1024,256,0.007648000027984381
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,1024,256,0.006047999951988459
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,1024,128,0.007519999984651804
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,1024,128,0.007840000092983246
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,1024,64,0.006752000190317631
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,1024,128,0.006335999816656113
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,1024,64,0.007584000006318092
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,1024,32,0.00687999976798892
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,1024,32,0.007840000092983246
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,1024,64,0.006271999794989824
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,1024,32,0.0063680000603199005
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,768,12288,0.01398400031030178
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,768,16384,0.014336000196635723
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,768,16384,0.01679999940097332
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,768,16384,0.011839999817311764
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,768,12288,0.01033599954098463
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,768,10240,0.01190400030463934
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,768,10240,0.012223999947309494
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,768,8192,0.011136000044643879
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,768,10240,0.009952000342309475
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,768,65536,0.0363520011305809
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,768,8192,0.01027199998497963
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,768,65536,0.05049600079655647
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,768,12288,0.012608000077307224
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,768,8192,0.008576000109314919
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,768,7168,0.011455999687314034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,768,7168,0.01027199998497963
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,768,6144,0.009759999811649323
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,768,6144,0.00979200005531311
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,768,5120,0.00979200005531311
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,768,7168,0.008287999778985977
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,768,6144,0.008128000423312187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,768,5120,0.0098879998549819
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,768,65536,0.03049599938094616
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,768,5120,0.008383999578654766
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,768,4096,0.008608000352978706
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,768,3584,0.008224000222980976
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,768,4096,0.008224000222980976
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,768,3584,0.008224000222980976
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,768,4096,0.0074880002066493034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,768,3072,0.008448000065982342
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,768,3584,0.007584000006318092
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,768,3072,0.008063999935984612
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,768,3072,0.007391999941319227
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,768,2560,0.00800000037997961
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,768,2560,0.008383999578654766
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,768,2048,0.007360000163316727
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,768,2560,0.007296000141650438
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,768,2048,0.00825599953532219
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,768,1536,0.007519999984651804
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,768,2048,0.006752000190317631
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,768,1536,0.007872000336647034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,768,1024,0.007007999811321497
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,768,1536,0.006335999816656113
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,768,1024,0.00800000037997961
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,768,1024,0.006207999773323536
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,768,768,0.006943999789655209
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,768,768,0.00800000037997961
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,768,768,0.00598399993032217
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,768,512,0.007071999832987785
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,768,512,0.007296000141650438
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,768,256,0.006943999789655209
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,768,256,0.007679999805986881
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,768,512,0.006111999973654747
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,768,256,0.006047999951988459
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,768,128,0.007135999854654074
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,768,128,0.007840000092983246
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,768,64,0.0074880002066493034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,768,128,0.006144000217318535
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,768,64,0.007391999941319227
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,768,32,0.007104000076651573
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,768,64,0.0060800001956522465
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,768,32,0.007552000228315592
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,768,32,0.006240000016987324
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,512,12288,0.012959999963641167
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,512,12288,0.012191999703645706
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,512,16384,0.01568000018596649
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,512,16384,0.013824000023305416
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,512,16384,0.011103999800980091
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,512,12288,0.009855999611318111
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,512,10240,0.01196799986064434
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,512,10240,0.011487999930977821
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,512,10240,0.008799999952316284
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,512,8192,0.011008000001311302
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,512,8192,0.010432000271975994
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,512,65536,0.04310400038957596
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,512,7168,0.010463999584317207
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,512,8192,0.008799999952316284
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,512,65536,0.03574400022625923
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,512,7168,0.009983999654650688
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,512,7168,0.00854399986565113
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,512,6144,0.009600000455975533
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,512,6144,0.009983999654650688
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,512,5120,0.009472000412642956
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,512,5120,0.009759999811649323
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,512,6144,0.0074880002066493034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,512,4096,0.008320000022649765
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,512,5120,0.007872000336647034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,512,4096,0.008320000022649765
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,512,3584,0.008415999822318554
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,512,4096,0.007391999941319227
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,512,65536,0.028224000707268715
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,512,3584,0.0080960001796484
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,512,3072,0.007872000336647034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,512,3072,0.008352000266313553
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,512,3584,0.007296000141650438
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,512,3072,0.007519999984651804
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,512,2560,0.008063999935984612
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,512,2560,0.007007999811321497
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,512,2048,0.0074880002066493034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,512,2560,0.008191999979317188
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,512,1536,0.007199999876320362
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,512,2048,0.00800000037997961
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,512,2048,0.0063680000603199005
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,512,1536,0.006175999995321035
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,512,1536,0.007840000092983246
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,512,1024,0.007135999854654074
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,512,1024,0.00800000037997961
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,512,768,0.006719999946653843
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,512,1024,0.006271999794989824
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,512,768,0.00774399982765317
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,512,768,0.0060800001956522465
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,512,512,0.007007999811321497
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,512,512,0.007872000336647034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,512,512,0.006175999995321035
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,512,128,0.00687999976798892
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,512,256,0.007615999784320593
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,512,256,0.006335999816656113
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,512,128,0.007584000006318092
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,512,64,0.007584000006318092
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,512,128,0.006207999773323536
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,512,64,0.00687999976798892
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,512,32,0.007455999962985516
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,512,64,0.006144000217318535
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,512,32,0.00687999976798892
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,512,32,0.006016000173985958
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,256,12288,0.01228800043463707
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,512,256,0.00687999976798892
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,256,16384,0.015263999812304974
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,256,16384,0.013952000066637993
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,256,16384,0.011711999773979187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,256,12288,0.011711999773979187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,256,12288,0.010495999827980995
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,256,10240,0.011296000331640244
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,256,10240,0.01190400030463934
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,256,8192,0.010239999741315842
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,256,10240,0.010304000228643417
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,256,8192,0.009952000342309475
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,256,65536,0.04243199899792671
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,256,7168,0.009983999654650688
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,256,65536,0.03558399900794029
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,256,8192,0.008320000022649765
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,256,7168,0.010143999941647053
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,256,6144,0.009119999594986439
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,256,7168,0.008960000239312649
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,256,6144,0.011744000017642975
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,256,5120,0.009696000255644321
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,256,5120,0.009472000412642956
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,256,6144,0.00825599953532219
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,256,5120,0.00723200011998415
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,256,65536,0.02687999978661537
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,256,4096,0.008031999692320824
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,256,4096,0.007840000092983246
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,256,4096,0.007391999941319227
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,256,3584,0.007071999832987785
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,256,3584,0.00800000037997961
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,256,3072,0.007712000049650669
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,256,3584,0.0080960001796484
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,256,3072,0.007840000092983246
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,256,3072,0.007071999832987785
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,256,2560,0.0080960001796484
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,256,2048,0.0077760000713169575
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,256,2048,0.007071999832987785
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,256,2560,0.007807999849319458
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,256,2560,0.007007999811321497
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,256,1536,0.007135999854654074
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,256,2048,0.006304000038653612
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,256,1536,0.008352000266313553
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,256,1536,0.006399999838322401
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,256,1024,0.006719999946653843
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,256,1024,0.007584000006318092
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,256,768,0.006912000011652708
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,256,1024,0.006591999903321266
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,256,768,0.007872000336647034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,256,768,0.006047999951988459
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,256,512,0.006624000146985054
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,256,512,0.007424000184983015
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,256,256,0.006752000190317631
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,256,256,0.007679999805986881
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,256,512,0.006016000173985958
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,256,256,0.0063680000603199005
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,256,128,0.006943999789655209
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,256,128,0.007552000228315592
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,256,128,0.006175999995321035
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,256,64,0.006783999968320131
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,256,64,0.007519999984651804
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,256,64,0.006144000217318535
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,256,32,0.006688000168651342
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,256,32,0.007455999962985516
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,256,32,0.006111999973654747
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,128,12288,0.012256000190973282
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,128,12288,0.01196799986064434
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,128,16384,0.015168000012636185
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,128,16384,0.013632000423967838
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,128,16384,0.01056000031530857
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,128,12288,0.010111999697983265
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,128,10240,0.011296000331640244
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,128,10240,0.011807999573647976
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,128,10240,0.009056000038981438
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,128,8192,0.010495999827980995
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,128,8192,0.009759999811649323
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,128,7168,0.009855999611318111
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,128,65536,0.034432001411914825
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,128,65536,0.04064000025391579
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,128,7168,0.009600000455975533
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,128,8192,0.008511999621987343
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,128,7168,0.008448000065982342
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,128,6144,0.0098879998549819
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,128,5120,0.008960000239312649
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,128,6144,0.007679999805986881
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,128,5120,0.009472000412642956
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,128,5120,0.0074880002066493034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,128,4096,0.008031999692320824
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,128,65536,0.026815999299287796
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,128,4096,0.007807999849319458
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,128,3584,0.007840000092983246
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,128,3584,0.007840000092983246
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,128,4096,0.007296000141650438
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,128,3072,0.008287999778985977
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,128,3072,0.007615999784320593
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,128,3584,0.03830400109291077
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,128,3072,0.0066559999249875546
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,128,2560,0.00800000037997961
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,128,2560,0.007679999805986881
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,128,2048,0.007872000336647034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,128,2560,0.006816000211983919
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,128,2048,0.007007999811321497
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,128,6144,0.009088000282645226
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,128,2048,0.006335999816656113
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,128,1536,0.006976000033318996
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,128,1536,0.0077760000713169575
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,128,1024,0.006624000146985054
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,128,1536,0.005888000130653381
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,128,768,0.006752000190317631
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,128,1024,0.007807999849319458
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,128,1024,0.006304000038653612
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,128,768,0.00800000037997961
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,128,768,0.006240000016987324
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,128,512,0.00684799998998642
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,128,512,0.007391999941319227
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,128,256,0.006816000211983919
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,128,512,0.00598399993032217
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,128,256,0.007552000228315592
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,128,256,0.006144000217318535
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,128,128,0.006816000211983919
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,128,128,0.007327999919652939
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,128,128,0.006144000217318535
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,128,64,0.006719999946653843
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,128,64,0.007199999876320362
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,128,64,0.005824000108987093
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,128,32,0.006719999946653843
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,128,32,0.007552000228315592
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,128,32,0.006144000217318535
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,64,12288,0.012575999833643436
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,64,12288,0.011807999573647976
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,64,16384,0.01568000018596649
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,64,16384,0.013824000023305416
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,64,16384,0.01065600011497736
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,64,12288,0.009600000455975533
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,64,10240,0.01142400037497282
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,64,10240,0.011264000087976456
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,64,10240,0.008799999952316284
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,64,8192,0.010239999741315842
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,64,8192,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,64,7168,0.01033599954098463
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,64,8192,0.008448000065982342
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,64,65536,0.04108799993991852
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,64,65536,0.03526400029659271
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,64,7168,0.009568000212311745
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,64,6144,0.009216000325977802
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,64,7168,0.00825599953532219
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,64,6144,0.010143999941647053
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,64,6144,0.007424000184983015
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,64,5120,0.009088000282645226
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,64,5120,0.009920000098645687
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,64,5120,0.006943999789655209
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,64,4096,0.00800000037997961
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,64,65536,0.026559999212622643
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,64,4096,0.0077760000713169575
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,64,3584,0.007872000336647034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,64,4096,0.007455999962985516
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,64,3584,0.007935999892652035
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,64,3584,0.006752000190317631
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,64,3072,0.007679999805986881
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,64,3072,0.007840000092983246
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,64,3072,0.00684799998998642
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,64,2560,0.00800000037997961
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,64,2560,0.007712000049650669
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,64,2048,0.007040000054985285
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,64,2560,0.006783999968320131
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,64,2048,0.007552000228315592
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,64,2048,0.005791999865323305
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,64,1536,0.00687999976798892
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,64,1536,0.007648000027984381
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,64,1536,0.006271999794989824
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,64,1024,0.007391999941319227
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,64,1024,0.007615999784320593
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,64,768,0.006624000146985054
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,64,1024,0.006144000217318535
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,64,768,0.007199999876320362
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,64,768,0.00595200015231967
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,64,512,0.0066559999249875546
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,64,512,0.006271999794989824
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,64,512,0.007615999784320593
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,64,256,0.006752000190317631
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,64,256,0.00723200011998415
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,64,128,0.006943999789655209
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,64,256,0.00598399993032217
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,64,128,0.007007999811321497
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,64,128,0.005824000108987093
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,64,64,0.006976000033318996
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,64,64,0.00684799998998642
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,64,64,0.005919999908655882
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,64,32,0.007327999919652939
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,64,32,0.005888000130653381
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,32,12288,0.012032000347971916
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,32,16384,0.016063999384641647
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,32,16384,0.01360000018030405
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,32,12288,0.01190400030463934
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,32,16384,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,64,32,0.006624000146985054
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,32,12288,0.009279999881982803
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,32,10240,0.010975999757647514
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,32,8192,0.010367999784648418
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,32,10240,0.011231999844312668
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,32,10240,0.008832000195980072
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,32,65536,0.03411199897527695
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,32,65536,0.04108799993991852
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,32,8192,0.009824000298976898
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,32,8192,0.008031999692320824
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,32,7168,0.009600000455975533
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,32,7168,0.01027199998497963
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,32,6144,0.00902399979531765
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,32,7168,0.00848000030964613
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,32,6144,0.010048000141978264
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,32,5120,0.00902399979531765
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,32,65536,0.02643200010061264
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,32,6144,0.0074880002066493034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,32,5120,0.009600000455975533
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,32,5120,0.007391999941319227
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,32,4096,0.007935999892652035
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,32,4096,0.0077760000713169575
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,32,3584,0.008191999979317188
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,32,3584,0.007712000049650669
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,32,4096,0.006976000033318996
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,32,3072,0.00774399982765317
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,32,3584,0.007007999811321497
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,32,3072,0.0077760000713169575
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,32,3072,0.006943999789655209
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,32,2560,0.008383999578654766
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,32,2560,0.007872000336647034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,32,2048,0.006943999789655209
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,32,2560,0.006496000103652477
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,32,2048,0.007552000228315592
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,32,1536,0.007104000076651573
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,32,2048,0.006175999995321035
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,32,1536,0.00774399982765317
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,32,1536,0.006111999973654747
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,32,1024,0.006688000168651342
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,32,1024,0.007519999984651804
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,32,1024,0.006016000173985958
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,32,768,0.006432000081986189
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,32,768,0.007615999784320593
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,32,512,0.006943999789655209
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,32,768,0.006144000217318535
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,32,512,0.007648000027984381
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,32,256,0.006527999881654978
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,32,512,0.00595200015231967
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,32,256,0.007327999919652939
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,32,128,0.007360000163316727
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,32,128,0.006752000190317631
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,32,256,0.005760000087320805
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,32,64,0.006943999789655209
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,32,128,0.005791999865323305
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,32,64,0.007360000163316727
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,32,64,0.005760000087320805
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,4,32,32,0.006496000103652477
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,4,32,32,0.0072639998979866505
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,4,32,32,0.006175999995321035
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,65536,12288,0.14668799936771393
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,65536,10240,0.1284160017967224
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,65536,16384,0.18831999599933624
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,65536,12288,0.26524800062179565
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,65536,10240,0.22278399765491486
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,65536,12288,0.26236799359321594
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,65536,16384,0.36371201276779175
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,65536,8192,0.100832000374794
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,65536,8192,0.17868800461292267
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,65536,7168,0.15891200304031372
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,65536,7168,0.09142400324344635
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,65536,10240,0.21932800114154816
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,65536,16384,0.3468799889087677
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,65536,8192,0.17609600722789764
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,65536,6144,0.13606399297714233
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,65536,6144,0.08435200154781342
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,65536,5120,0.07676800340414047
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,65536,7168,0.15705600380897522
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,65536,5120,0.11654400080442429
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,65536,6144,0.1335040032863617
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,65536,4096,0.09334400296211243
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,65536,4096,0.057792000472545624
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,65536,5120,0.11456000059843063
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,65536,4096,0.09219200164079666
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,65536,3584,0.05443200096487999
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,65536,3584,0.08278399705886841
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,65536,3072,0.049536000937223434
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,65536,3072,0.07369600236415863
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,65536,3584,0.08262400329113007
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,65536,2560,0.047520000487565994
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,65536,2048,0.051711998879909515
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,65536,3072,0.07356800138950348
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,65536,2048,0.04182400181889534
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,65536,2560,0.07004799693822861
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,65536,1536,0.03519999980926514
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,65536,1536,0.04291199892759323
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,65536,2048,0.05113599821925163
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,65536,1024,0.027871999889612198
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,65536,1024,0.0323840007185936
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,65536,1536,0.044096000492572784
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,65536,1024,0.038656000047922134
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,65536,768,0.02284800074994564
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,65536,768,0.028063999488949776
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,65536,2560,0.066880002617836
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,65536,768,0.03759999945759773
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,65536,512,0.022592000663280487
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,65536,512,0.01958400011062622
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,65536,512,0.03593600168824196
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,65536,256,0.015936000272631645
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,65536,256,0.01583999954164028
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,65536,128,0.014175999909639359
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,65536,256,0.03526400029659271
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,65536,128,0.014271999709308147
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,65536,128,0.03516799956560135
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,65536,64,0.01603199914097786
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,65536,64,0.01635199971497059
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,65536,64,0.034912001341581345
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,65536,32,0.016127999871969223
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,65536,32,0.01587199978530407
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,65536,32,0.035232000052928925
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,16384,12288,0.0432640016078949
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,16384,12288,0.07334399968385696
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,16384,16384,0.05350400134921074
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,16384,16384,0.09455999732017517
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,16384,16384,0.09216000139713287
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,16384,12288,0.0708480030298233
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,16384,10240,0.06294400244951248
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,16384,10240,0.03814399987459183
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,16384,10240,0.06019200012087822
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,16384,8192,0.05052800104022026
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,16384,8192,0.03248000144958496
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,16384,7168,0.045343998819589615
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,16384,8192,0.04960000142455101
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,16384,7168,0.029632000252604485
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,16384,65536,0.1879359930753708
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,16384,7168,0.045791998505592346
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,16384,6144,0.02703999914228916
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,16384,6144,0.04032000154256821
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,16384,6144,0.03907199949026108
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,16384,5120,0.024992000311613083
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,16384,5120,0.03455999866127968
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,16384,4096,0.02953599952161312
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,16384,5120,0.03542400151491165
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,16384,4096,0.020031999796628952
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,16384,65536,0.35526400804519653
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,16384,3584,0.028031999245285988
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,16384,3584,0.018912000581622124
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,16384,4096,0.029440000653266907
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,16384,3072,0.024960000067949295
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,16384,3584,0.02579200081527233
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,16384,3072,0.01833599992096424
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,16384,2560,0.01708799973130226
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,16384,2560,0.022048000246286392
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,16384,3072,0.024159999564290047
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,16384,2560,0.021856000646948814
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,16384,2048,0.018303999677300453
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,16384,2048,0.01532800029963255
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,16384,2048,0.01788800023496151
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,16384,1536,0.014944000169634819
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,16384,1536,0.016095999628305435
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,16384,1536,0.01616000011563301
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,16384,65536,0.35369598865509033
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,16384,1024,0.013472000136971474
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,16384,1024,0.013567999936640263
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,16384,1024,0.014047999866306782
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,16384,768,0.012671999633312225
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,16384,768,0.012959999963641167
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,16384,512,0.011264000087976456
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,16384,512,0.011487999930977821
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,16384,768,0.013791999779641628
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,16384,512,0.013504000380635262
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,16384,256,0.010688000358641148
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,16384,256,0.010367999784648418
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,16384,128,0.010367999784648418
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,16384,256,0.013663999736309052
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,16384,128,0.009696000255644321
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,16384,64,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,16384,64,0.012992000207304955
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,16384,128,0.013248000293970108
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,16384,64,0.01321600005030632
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,16384,32,0.010495999827980995
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,16384,32,0.01283199992030859
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,16384,32,0.013535999692976475
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,12288,16384,0.04224000126123428
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,12288,12288,0.05728000029921532
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,12288,16384,0.0732479989528656
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,12288,16384,0.07254400104284286
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,12288,10240,0.03081599995493889
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,12288,10240,0.047775998711586
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,12288,12288,0.05628800019621849
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,12288,12288,0.03417599946260452
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,12288,10240,0.04761600121855736
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,12288,8192,0.03939199820160866
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,12288,8192,0.02659199945628643
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,12288,65536,0.14339199662208557
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,12288,8192,0.03888000175356865
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,12288,7168,0.03551999852061272
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,12288,7168,0.024000000208616257
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,12288,6144,0.02252800017595291
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,12288,6144,0.032255999743938446
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,12288,7168,0.0363520011305809
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,12288,5120,0.02800000086426735
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,12288,65536,0.2710399925708771
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,12288,6144,0.031039999797940254
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,12288,5120,0.020767999812960625
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,12288,4096,0.02393599972128868
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,12288,5120,0.028031999245285988
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,12288,4096,0.017184000462293625
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,12288,3584,0.02143999934196472
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,12288,4096,0.023296000435948372
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,12288,65536,0.27164798974990845
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,12288,3584,0.01635199971497059
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,12288,3584,0.021727999672293663
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,12288,3072,0.01532800029963255
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,12288,3072,0.019519999623298645
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,12288,2560,0.017696000635623932
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,12288,2560,0.014368000440299511
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,12288,3072,0.020896000787615776
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,12288,2048,0.015424000099301338
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,12288,2048,0.013151999562978745
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,12288,2560,0.018400000408291817
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,12288,1536,0.013887999579310417
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,12288,2048,0.01539199985563755
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,12288,1536,0.012512000277638435
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,12288,1536,0.013376000337302685
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,12288,1024,0.011872000060975552
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,12288,768,0.011008000001311302
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,12288,1024,0.011648000217974186
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,12288,1024,0.012512000277638435
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,12288,768,0.011744000017642975
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,12288,512,0.009759999811649323
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,12288,768,0.01196799986064434
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,12288,512,0.011231999844312668
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,12288,256,0.009472000412642956
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,12288,512,0.011648000217974186
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,12288,256,0.010463999584317207
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,12288,256,0.011711999773979187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,12288,128,0.009855999611318111
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,12288,64,0.009119999594986439
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,12288,128,0.009279999881982803
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,12288,128,0.01158399973064661
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,12288,64,0.011264000087976456
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,12288,64,0.011455999687314034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,12288,32,0.009535999968647957
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,12288,32,0.011071999557316303
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,12288,32,0.011680000461637974
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,10240,12288,0.0490880012512207
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,10240,12288,0.03174399957060814
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,10240,16384,0.06169600039720535
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,10240,16384,0.039744000881910324
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,10240,16384,0.06108799949288368
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,10240,12288,0.04755200073122978
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,10240,10240,0.04156799986958504
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,10240,10240,0.029055999591946602
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,10240,10240,0.04070400074124336
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,10240,8192,0.03500799834728241
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,10240,8192,0.022784000262618065
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,10240,7168,0.03145600110292435
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,10240,8192,0.03324799984693527
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,10240,7168,0.02175999991595745
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,10240,65536,0.1281599998474121
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,10240,7168,0.031808000057935715
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,10240,6144,0.020608000457286835
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,10240,6144,0.027904000133275986
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,10240,65536,0.22511999309062958
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,10240,5120,0.019392000511288643
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,10240,6144,0.02755199931561947
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,10240,5120,0.025407999753952026
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,10240,4096,0.021183999255299568
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,10240,5120,0.02486399933695793
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,10240,4096,0.015552000142633915
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,10240,3584,0.01945599913597107
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,10240,3584,0.014911999925971031
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,10240,3072,0.017855999991297722
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,10240,3072,0.013824000023305416
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,10240,4096,0.020576000213623047
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,10240,3584,0.01929599978029728
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,10240,3072,0.017952000722289085
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,10240,65536,0.22553600370883942
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,10240,2560,0.0163199994713068
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,10240,2048,0.01398400031030178
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,10240,2560,0.014271999709308147
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,10240,2048,0.012736000120639801
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,10240,1536,0.012768000364303589
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,10240,2560,0.016992000862956047
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,10240,1536,0.011935999616980553
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,10240,2048,0.014303999952971935
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,10240,1024,0.01056000031530857
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,10240,1024,0.010847999714314938
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,10240,768,0.010239999741315842
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,10240,1536,0.012799999676644802
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,10240,1024,0.011807999573647976
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,10240,768,0.010879999957978725
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,10240,768,0.01119999960064888
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,10240,512,0.010400000028312206
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,10240,128,0.00902399979531765
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,10240,512,0.010591999627649784
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,10240,256,0.009472000412642956
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,10240,256,0.010239999741315842
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,10240,256,0.01065600011497736
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,10240,128,0.009727999567985535
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,10240,128,0.010463999584317207
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,10240,64,0.008927999995648861
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,10240,64,0.010591999627649784
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,10240,64,0.01065600011497736
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,10240,32,0.01065600011497736
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,10240,32,0.008991999551653862
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,10240,32,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,10240,512,0.00979200005531311
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,8192,12288,0.04217600077390671
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,8192,16384,0.03161599859595299
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,8192,16384,0.05471999943256378
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,8192,16384,0.05023999884724617
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,8192,12288,0.02595200017094612
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,8192,12288,0.03996799886226654
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,8192,10240,0.036320000886917114
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,8192,10240,0.022752000018954277
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,8192,8192,0.029632000252604485
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,8192,10240,0.03500799834728241
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,8192,8192,0.019711999222636223
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,8192,8192,0.02864000014960766
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,8192,7168,0.027424000203609467
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,8192,65536,0.09811200201511383
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,8192,7168,0.01820800080895424
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,8192,7168,0.02630399912595749
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,8192,6144,0.025280000641942024
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,8192,65536,0.1952960044145584
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,8192,6144,0.017055999487638474
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,8192,5120,0.021856000646948814
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,8192,6144,0.022911999374628067
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,8192,5120,0.01635199971497059
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,8192,4096,0.018271999433636665
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,8192,5120,0.020959999412298203
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,8192,3584,0.0180479995906353
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,8192,4096,0.013791999779641628
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,8192,3584,0.013120000250637531
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,8192,4096,0.018303999677300453
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,8192,65536,0.18172800540924072
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,8192,3584,0.017184000462293625
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,8192,3072,0.016224000602960587
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,8192,2560,0.014816000126302242
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,8192,3072,0.015647999942302704
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,8192,2560,0.011807999573647976
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,8192,3072,0.012415999546647072
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,8192,2048,0.012736000120639801
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,8192,2560,0.014655999839305878
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,8192,2048,0.011136000044643879
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,8192,1536,0.011327999643981457
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,8192,1536,0.010143999941647053
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,8192,2048,0.01235199999064207
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,8192,1024,0.01033599954098463
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,8192,1536,0.011392000131309032
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,8192,1024,0.010048000141978264
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,8192,1024,0.010367999784648418
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,8192,768,0.010015999898314476
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,8192,768,0.009600000455975533
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,8192,512,0.009503999724984169
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,8192,256,0.008832000195980072
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,8192,512,0.011455999687314034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,8192,768,0.009759999811649323
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,8192,512,0.009568000212311745
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,8192,256,0.009503999724984169
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,8192,256,0.009824000298976898
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,8192,128,0.008383999578654766
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,8192,64,0.008736000396311283
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,8192,128,0.009184000082314014
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,8192,64,0.009151999838650227
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,8192,128,0.009568000212311745
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,8192,64,0.009503999724984169
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,8192,32,0.008671999908983707
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,8192,32,0.009472000412642956
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,8192,32,0.009759999811649323
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,7168,12288,0.03872000053524971
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,7168,12288,0.02473600022494793
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,7168,16384,0.028863999992609024
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,7168,16384,0.04902400076389313
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,7168,16384,0.04585599899291992
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,7168,10240,0.035360001027584076
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,7168,12288,0.03721600025892258
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,7168,10240,0.021888000890612602
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,7168,10240,0.03145600110292435
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,7168,8192,0.02879999950528145
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,7168,8192,0.018144000321626663
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,7168,8192,0.026367999613285065
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,7168,7168,0.026688000187277794
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,7168,7168,0.017952000722289085
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,7168,65536,0.09203200042247772
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,7168,6144,0.022592000663280487
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,7168,65536,0.18201600015163422
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,7168,6144,0.01724799908697605
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,7168,5120,0.02054399996995926
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,7168,6144,0.02348800003528595
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,7168,5120,0.015776000916957855
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,7168,5120,0.019392000511288643
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,7168,4096,0.013407999649643898
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,7168,4096,0.017216000705957413
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,7168,3584,0.012799999676644802
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,7168,3584,0.016256000846624374
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,7168,4096,0.016127999871969223
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,7168,65536,0.16793599724769592
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,7168,3584,0.015231999568641186
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,7168,3072,0.015776000916957855
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,7168,3072,0.011935999616980553
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,7168,7168,0.024351999163627625
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,7168,2560,0.01414399966597557
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,7168,2560,0.011776000261306763
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,7168,3072,0.015072000212967396
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,7168,2560,0.013279999606311321
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,7168,2048,0.012128000147640705
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,7168,1536,0.010944000445306301
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,7168,2048,0.010591999627649784
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,7168,2048,0.01190400030463934
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,7168,1536,0.01027199998497963
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,7168,1024,0.0098879998549819
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,7168,1536,0.010975999757647514
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,7168,1024,0.009440000168979168
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,7168,1024,0.009472000412642956
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,7168,768,0.009535999968647957
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,7168,768,0.009184000082314014
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,7168,768,0.009631999768316746
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,7168,512,0.008767999708652496
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,7168,512,0.00902399979531765
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,7168,512,0.009088000282645226
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,7168,256,0.008320000022649765
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,7168,256,0.009247999638319016
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,7168,128,0.008767999708652496
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,7168,256,0.008736000396311283
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,7168,128,0.009088000282645226
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,7168,128,0.009312000125646591
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,7168,64,0.008736000396311283
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,7168,64,0.009312000125646591
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,7168,64,0.009151999838650227
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,7168,32,0.008224000222980976
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,7168,32,0.009056000038981438
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,7168,32,0.008704000152647495
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,6144,12288,0.03215999901294708
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,6144,12288,0.02175999991595745
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,6144,16384,0.026655999943614006
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,6144,16384,0.04121600091457367
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,6144,16384,0.03984000161290169
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,6144,12288,0.031231999397277832
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,6144,10240,0.027583999559283257
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,6144,10240,0.021088000386953354
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,6144,8192,0.024447999894618988
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,6144,10240,0.027135999873280525
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,6144,8192,0.01724799908697605
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,6144,8192,0.023135999217629433
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,6144,7168,0.021568000316619873
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,6144,7168,0.016672000288963318
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,6144,65536,0.08406399935483932
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,6144,7168,0.020896000787615776
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,6144,6144,0.019392000511288643
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,6144,65536,0.14047999680042267
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,6144,6144,0.015072000212967396
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,6144,5120,0.017055999487638474
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,6144,5120,0.015359999611973763
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,6144,6144,0.01894400082528591
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,6144,4096,0.015200000256299973
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,6144,5120,0.017184000462293625
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,6144,4096,0.013472000136971474
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,6144,3584,0.013791999779641628
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,6144,3584,0.011872000060975552
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,6144,65536,0.13872000575065613
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,6144,4096,0.014655999839305878
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,6144,3584,0.014816000126302242
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,6144,3072,0.013151999562978745
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,6144,3072,0.011264000087976456
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,6144,3072,0.013183999806642532
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,6144,2560,0.01235199999064207
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,6144,2560,0.010944000445306301
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,6144,2048,0.011008000001311302
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,6144,2560,0.012575999833643436
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,6144,2048,0.010400000028312206
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,6144,2048,0.01119999960064888
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,6144,1536,0.010015999898314476
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,6144,1536,0.009855999611318111
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,6144,1536,0.00979200005531311
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,6144,1024,0.00886400043964386
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,6144,1024,0.009088000282645226
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,6144,768,0.008448000065982342
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,6144,1024,0.009247999638319016
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,6144,768,0.009216000325977802
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,6144,768,0.008576000109314919
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,6144,512,0.007872000336647034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,6144,512,0.00886400043964386
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,6144,512,0.008799999952316284
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,6144,256,0.007424000184983015
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,6144,256,0.009472000412642956
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,6144,256,0.008576000109314919
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,6144,128,0.00723200011998415
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,6144,128,0.008960000239312649
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,6144,128,0.00854399986565113
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,6144,64,0.007519999984651804
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,6144,64,0.008576000109314919
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,6144,64,0.008608000352978706
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,6144,32,0.008736000396311283
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,6144,32,0.00863999966531992
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,5120,12288,0.02755199931561947
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,5120,16384,0.025151999667286873
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,5120,16384,0.03551999852061272
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,5120,16384,0.03465599939227104
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,5120,12288,0.020128000527620316
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,6144,32,0.007519999984651804
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,5120,12288,0.027807999402284622
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,5120,10240,0.02454400062561035
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,5120,8192,0.02051199972629547
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,5120,10240,0.019648000597953796
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,5120,10240,0.025760000571608543
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,5120,8192,0.017696000635623932
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,5120,65536,0.08140800148248672
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,5120,8192,0.02067199908196926
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,5120,7168,0.018848000094294548
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,5120,7168,0.015584000386297703
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,5120,65536,0.11932799965143204
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,5120,6144,0.016896000131964684
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,5120,7168,0.019007999449968338
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,5120,6144,0.014208000153303146
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,5120,6144,0.017216000705957413
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,5120,5120,0.01532800029963255
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,5120,5120,0.01408000010997057
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,5120,4096,0.01142400037497282
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,5120,65536,0.12243200093507767
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,5120,4096,0.013407999649643898
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,5120,4096,0.013472000136971474
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,5120,5120,0.015615999698638916
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,5120,3584,0.010912000201642513
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,5120,3584,0.012703999876976013
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,5120,3584,0.013311999849975109
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,5120,3072,0.011264000087976456
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,5120,3072,0.01152000017464161
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,5120,2560,0.011103999800980091
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,5120,2560,0.01190400030463934
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,5120,3072,0.01190400030463934
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,5120,2048,0.010688000358641148
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,5120,2560,0.011680000461637974
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,5120,2048,0.010495999827980995
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,5120,1536,0.009088000282645226
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,5120,2048,0.010623999871313572
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,5120,1024,0.00825599953532219
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,5120,1536,0.009600000455975533
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,5120,1536,0.009503999724984169
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,5120,1024,0.009119999594986439
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,5120,1024,0.008799999952316284
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,5120,768,0.0081599997356534
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,5120,768,0.008767999708652496
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,5120,512,0.007455999962985516
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,5120,768,0.008671999908983707
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,5120,512,0.00863999966531992
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,5120,512,0.00848000030964613
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,5120,256,0.007519999984651804
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,5120,256,0.008320000022649765
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,5120,256,0.008608000352978706
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,5120,128,0.007327999919652939
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,5120,128,0.00848000030964613
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,5120,64,0.008511999621987343
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,5120,128,0.008448000065982342
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,5120,64,0.007327999919652939
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,5120,64,0.00825599953532219
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,5120,32,0.007199999876320362
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,5120,32,0.008736000396311283
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,5120,32,0.008511999621987343
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,4096,12288,0.024351999163627625
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,4096,12288,0.016575999557971954
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,4096,16384,0.029152000322937965
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,4096,16384,0.019648000597953796
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,4096,16384,0.028351999819278717
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,4096,10240,0.02099199965596199
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,4096,12288,0.023104000836610794
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,4096,10240,0.014560000039637089
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,4096,10240,0.02038400061428547
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,4096,8192,0.017983999103307724
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,4096,8192,0.012959999963641167
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,4096,7168,0.016896000131964684
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,4096,8192,0.01740800030529499
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,4096,65536,0.05446400120854378
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,4096,7168,0.012415999546647072
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,4096,65536,0.09600000083446503
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,4096,7168,0.015904000028967857
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,4096,6144,0.015168000012636185
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,4096,6144,0.011744000017642975
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,4096,5120,0.014208000153303146
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,4096,5120,0.011455999687314034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,4096,6144,0.014816000126302242
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,4096,4096,0.01235199999064207
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,4096,5120,0.013311999849975109
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,4096,4096,0.009952000342309475
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,4096,65536,0.09529600292444229
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,4096,3584,0.0098879998549819
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,4096,3584,0.011648000217974186
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,4096,4096,0.011935999616980553
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,4096,3584,0.011136000044643879
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,4096,3072,0.00886400043964386
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,4096,3072,0.010688000358641148
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,4096,2560,0.00886400043964386
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,4096,2560,0.010688000358641148
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,4096,2048,0.009535999968647957
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,4096,2560,0.010528000071644783
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,4096,2048,0.008128000423312187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,4096,1536,0.008448000065982342
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,4096,2048,0.009312000125646591
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,4096,1536,0.0081599997356534
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,4096,1024,0.007840000092983246
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,4096,1536,0.00863999966531992
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,4096,1024,0.008031999692320824
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,4096,1024,0.00800000037997961
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,4096,768,0.007455999962985516
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,4096,768,0.007424000184983015
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,4096,512,0.007552000228315592
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,4096,768,0.00800000037997961
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,4096,512,0.007679999805986881
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,4096,512,0.00774399982765317
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,4096,256,0.0071680000983178616
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,4096,256,0.007455999962985516
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,4096,3072,0.010688000358641148
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,4096,256,0.007327999919652939
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,4096,128,0.007104000076651573
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,4096,128,0.0074880002066493034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,4096,128,0.007615999784320593
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,4096,64,0.00723200011998415
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,4096,32,0.007071999832987785
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,4096,32,0.007007999811321497
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,4096,64,0.007391999941319227
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,4096,64,0.007327999919652939
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,4096,32,0.007455999962985516
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,3584,12288,0.02239999920129776
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,3584,12288,0.015168000012636185
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,3584,16384,0.017696000635623932
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,3584,16384,0.027488000690937042
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,3584,16384,0.025855999439954758
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,3584,12288,0.02143999934196472
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,3584,10240,0.018912000581622124
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,3584,10240,0.013791999779641628
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,3584,8192,0.016767999157309532
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,3584,10240,0.018783999606966972
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,3584,8192,0.012608000077307224
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,3584,8192,0.015968000516295433
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,3584,7168,0.01568000018596649
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,3584,65536,0.050912000238895416
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,3584,7168,0.011839999817311764
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,3584,65536,0.088639996945858
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,3584,7168,0.014879999682307243
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,3584,6144,0.014368000440299511
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,3584,6144,0.01104000024497509
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,3584,5120,0.013183999806642532
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,3584,6144,0.013439999893307686
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,3584,5120,0.010912000201642513
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,3584,4096,0.009600000455975533
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,3584,4096,0.011711999773979187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,3584,5120,0.01283199992030859
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,3584,4096,0.01104000024497509
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,3584,3584,0.011807999573647976
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,3584,65536,0.08489599823951721
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,3584,3584,0.009568000212311745
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,3584,3072,0.010432000271975994
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,3584,3584,0.010591999627649784
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,3584,3072,0.008960000239312649
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,3584,3072,0.010304000228643417
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,3584,2560,0.01017600018531084
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,3584,2560,0.008799999952316284
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,3584,2048,0.008128000423312187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,3584,2048,0.008671999908983707
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,3584,2048,0.009151999838650227
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,3584,1536,0.008511999621987343
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,3584,2560,0.010208000428974628
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,3584,1536,0.008063999935984612
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,3584,1024,0.007903999648988247
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,3584,768,0.007584000006318092
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,3584,768,0.0071680000983178616
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,3584,1024,0.007552000228315592
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,3584,1024,0.007840000092983246
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,3584,1536,0.008191999979317188
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,3584,768,0.007584000006318092
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,3584,512,0.007712000049650669
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,3584,512,0.007648000027984381
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,3584,256,0.007104000076651573
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,3584,512,0.007424000184983015
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,3584,256,0.0071680000983178616
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,3584,128,0.007552000228315592
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,3584,128,0.007391999941319227
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,3584,256,0.007360000163316727
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,3584,128,0.007584000006318092
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,3584,64,0.00723200011998415
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,3584,32,0.007104000076651573
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,3584,64,0.00723200011998415
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,3584,32,0.006943999789655209
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,3584,64,0.007584000006318092
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,3584,32,0.007840000092983246
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,3072,12288,0.02006400004029274
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,3072,16384,0.0163199994713068
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,3072,16384,0.024831999093294144
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,3072,16384,0.0244159996509552
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,3072,12288,0.019519999623298645
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,3072,10240,0.01756799966096878
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,3072,10240,0.012736000120639801
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,3072,8192,0.01539199985563755
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,3072,12288,0.013887999579310417
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,3072,10240,0.017023999243974686
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,3072,8192,0.011296000331640244
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,3072,65536,0.04399999976158142
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,3072,8192,0.014688000082969666
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,3072,65536,0.07772800326347351
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,3072,7168,0.011103999800980091
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,3072,7168,0.015135999768972397
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,3072,7168,0.014720000326633453
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,3072,6144,0.013311999849975109
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,3072,6144,0.010400000028312206
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,3072,5120,0.012927999719977379
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,3072,5120,0.010239999741315842
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,3072,6144,0.012799999676644802
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,3072,5120,0.01196799986064434
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,3072,4096,0.010944000445306301
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,3072,4096,0.009535999968647957
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,3072,65536,0.07654400169849396
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,3072,3584,0.010528000071644783
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,3072,4096,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,3072,3584,0.0098879998549819
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,3072,3584,0.009216000325977802
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,3072,3072,0.010208000428974628
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,3072,3072,0.008799999952316284
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,3072,3072,0.009855999611318111
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,3072,2560,0.008799999952316284
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,3072,2560,0.009535999968647957
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,3072,2048,0.008063999935984612
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,3072,2048,0.008576000109314919
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,3072,2560,0.009824000298976898
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,3072,1536,0.008352000266313553
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,3072,2048,0.009119999594986439
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,3072,1536,0.007840000092983246
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,3072,1024,0.0071680000983178616
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,3072,1024,0.007807999849319458
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,3072,768,0.007455999962985516
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,3072,768,0.007327999919652939
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,3072,1536,0.008320000022649765
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,3072,1024,0.007519999984651804
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,3072,768,0.007615999784320593
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,3072,512,0.0071680000983178616
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,3072,512,0.007519999984651804
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,3072,256,0.007199999876320362
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,3072,256,0.0071680000983178616
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,3072,512,0.007679999805986881
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,3072,256,0.007519999984651804
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,3072,128,0.006719999946653843
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,3072,128,0.007040000054985285
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,3072,128,0.00723200011998415
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,3072,64,0.006688000168651342
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,3072,64,0.0071680000983178616
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,3072,64,0.007519999984651804
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,3072,32,0.006624000146985054
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,3072,32,0.007199999876320362
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,3072,32,0.007135999854654074
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,2560,12288,0.019039999693632126
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,2560,12288,0.013088000006973743
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,2560,16384,0.015647999942302704
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,2560,16384,0.022336000576615334
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,2560,16384,0.021727999672293663
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,2560,12288,0.01833599992096424
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,2560,10240,0.016672000288963318
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,2560,10240,0.012671999633312225
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,2560,8192,0.014240000396966934
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,2560,10240,0.016127999871969223
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,2560,8192,0.011327999643981457
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,2560,8192,0.014047999866306782
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,2560,7168,0.013439999893307686
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,2560,65536,0.06908799707889557
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,2560,65536,0.04124800115823746
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,2560,7168,0.01104000024497509
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,2560,7168,0.013311999849975109
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,2560,6144,0.012640000320971012
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,2560,6144,0.010111999697983265
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,2560,5120,0.012000000104308128
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,2560,6144,0.012799999676644802
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,2560,5120,0.010143999941647053
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,2560,5120,0.011359999887645245
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,2560,4096,0.010623999871313572
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,2560,4096,0.009088000282645226
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,2560,3584,0.009952000342309475
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,2560,65536,0.07119999825954437
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,2560,4096,0.010080000385642052
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,2560,3584,0.008832000195980072
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,2560,3584,0.009664000011980534
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,2560,3072,0.009568000212311745
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,2560,3072,0.008736000396311283
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,2560,3072,0.009440000168979168
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,2560,2560,0.009279999881982803
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,2560,2560,0.008320000022649765
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,2560,2560,0.00886400043964386
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,2560,2048,0.00825599953532219
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,2560,2048,0.00774399982765317
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,2560,2048,0.0081599997356534
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,2560,1536,0.008287999778985977
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,2560,1536,0.007840000092983246
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,2560,1536,0.007903999648988247
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,2560,1024,0.007199999876320362
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,2560,1024,0.007104000076651573
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,2560,768,0.007840000092983246
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,2560,768,0.007360000163316727
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,2560,512,0.007104000076651573
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,2560,768,0.007679999805986881
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,2560,512,0.007135999854654074
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,2560,256,0.007104000076651573
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,2560,512,0.00687999976798892
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,2560,256,0.007040000054985285
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,2560,256,0.007135999854654074
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,2560,128,0.006943999789655209
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,2560,128,0.00723200011998415
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,2560,128,0.006976000033318996
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,2560,64,0.00684799998998642
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,2560,64,0.007007999811321497
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,2560,64,0.006912000011652708
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,2560,32,0.006816000211983919
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,2560,32,0.007104000076651573
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,2560,32,0.00774399982765317
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,2560,1024,0.007327999919652939
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,2048,12288,0.017055999487638474
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,2048,16384,0.01961600035429001
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,2048,16384,0.014399999752640724
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,2048,12288,0.012032000347971916
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,2048,16384,0.017696000635623932
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,2048,12288,0.014240000396966934
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,2048,10240,0.015296000055968761
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,2048,10240,0.011487999930977821
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,2048,8192,0.013407999649643898
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,2048,10240,0.013504000380635262
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,2048,8192,0.010432000271975994
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,2048,65536,0.0352960005402565
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,2048,7168,0.012608000077307224
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,2048,65536,0.06063999980688095
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,2048,8192,0.011327999643981457
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,2048,7168,0.010239999741315842
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,2048,7168,0.011168000288307667
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,2048,6144,0.011680000461637974
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,2048,6144,0.009696000255644321
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,2048,5120,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,2048,5120,0.01017600018531084
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,2048,6144,0.010304000228643417
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,2048,5120,0.009727999567985535
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,2048,4096,0.009696000255644321
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,2048,65536,0.051072001457214355
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,2048,4096,0.008511999621987343
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,2048,4096,0.008767999708652496
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,2048,3584,0.009247999638319016
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,2048,3584,0.007968000136315823
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,2048,3584,0.008415999822318554
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,2048,3072,0.009151999838650227
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,2048,2560,0.009151999838650227
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,2048,3072,0.007872000336647034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,2048,3072,0.007872000336647034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,2048,2560,0.00774399982765317
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,2048,2560,0.008191999979317188
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,2048,2048,0.00774399982765317
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,2048,2048,0.0077760000713169575
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,2048,2048,0.007807999849319458
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,2048,1536,0.007872000336647034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,2048,1536,0.007712000049650669
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,2048,1024,0.007424000184983015
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,2048,1536,0.006912000011652708
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,2048,1024,0.007104000076651573
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,2048,768,0.007296000141650438
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,2048,1024,0.006624000146985054
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,2048,768,0.007519999984651804
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,2048,768,0.006688000168651342
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,2048,512,0.007104000076651573
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,2048,512,0.007104000076651573
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,2048,256,0.007007999811321497
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,2048,512,0.006591999903321266
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,2048,256,0.007327999919652939
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,2048,256,0.006527999881654978
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,2048,128,0.006943999789655209
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,2048,64,0.006783999968320131
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,2048,128,0.007424000184983015
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,2048,128,0.006527999881654978
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,2048,64,0.006943999789655209
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,2048,32,0.006591999903321266
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,2048,64,0.006207999773323536
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,2048,32,0.007007999811321497
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,2048,32,0.006591999903321266
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,1536,12288,0.014783999882638454
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,1536,12288,0.012992000207304955
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,1536,16384,0.017535999417304993
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,1536,16384,0.013279999606311321
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,1536,16384,0.014495999552309513
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,1536,12288,0.012512000277638435
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,1536,10240,0.01369599997997284
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,1536,10240,0.011071999557316303
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,1536,10240,0.011231999844312668
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,1536,8192,0.009759999811649323
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,1536,7168,0.012032000347971916
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,1536,8192,0.01017600018531084
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,1536,65536,0.03276799991726875
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,1536,7168,0.009472000412642956
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,1536,65536,0.051392000168561935
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,1536,7168,0.010432000271975994
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,1536,6144,0.010688000358641148
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,1536,6144,0.00979200005531311
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,1536,8192,0.013279999606311321
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,1536,5120,0.009247999638319016
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,1536,65536,0.04092799872159958
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,1536,6144,0.009216000325977802
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,1536,5120,0.010304000228643417
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,1536,4096,0.008191999979317188
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,1536,5120,0.00863999966531992
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,1536,4096,0.009312000125646591
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,1536,3584,0.007935999892652035
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,1536,3584,0.00863999966531992
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,1536,4096,0.00800000037997961
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,1536,3584,0.007968000136315823
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,1536,3072,0.00854399986565113
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,1536,3072,0.008063999935984612
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,1536,3072,0.007391999941319227
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,1536,2560,0.008767999708652496
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,1536,2560,0.00774399982765317
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,1536,2048,0.007648000027984381
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,1536,2560,0.007519999984651804
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,1536,1536,0.007424000184983015
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,1536,2048,0.007584000006318092
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,1536,2048,0.0066559999249875546
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,1536,1536,0.007391999941319227
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,1536,1536,0.006783999968320131
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,1536,1024,0.0072639998979866505
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,1536,1024,0.00723200011998415
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,1536,768,0.0071680000983178616
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,1536,1024,0.006624000146985054
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,1536,768,0.007360000163316727
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,1536,512,0.006752000190317631
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,1536,768,0.006144000217318535
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,1536,512,0.007040000054985285
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,1536,512,0.006591999903321266
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,1536,256,0.006943999789655209
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,1536,256,0.007135999854654074
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,1536,128,0.006912000011652708
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,1536,256,0.0063680000603199005
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,1536,128,0.006976000033318996
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,1536,128,0.006304000038653612
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,1536,64,0.006719999946653843
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,1536,64,0.006783999968320131
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,1536,32,0.006816000211983919
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,1536,64,0.006688000168651342
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,1536,32,0.007007999811321497
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,1536,32,0.006111999973654747
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,1024,12288,0.013632000423967838
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,1024,12288,0.011071999557316303
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,1024,16384,0.016672000288963318
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,1024,16384,0.012191999703645706
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,1024,16384,0.012384000234305859
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,1024,12288,0.011008000001311302
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,1024,10240,0.012480000033974648
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,1024,10240,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,1024,10240,0.010015999898314476
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,1024,8192,0.011296000331640244
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,1024,8192,0.009119999594986439
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,1024,7168,0.011008000001311302
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,1024,65536,0.046911999583244324
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,1024,65536,0.02879999950528145
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,1024,8192,0.008895999751985073
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,1024,7168,0.009247999638319016
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,1024,7168,0.009088000282645226
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,1024,6144,0.010208000428974628
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,1024,6144,0.008991999551653862
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,1024,5120,0.008960000239312649
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,1024,5120,0.01027199998497963
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,1024,6144,0.008191999979317188
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,1024,5120,0.0080960001796484
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,1024,65536,0.031488001346588135
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,1024,4096,0.00886400043964386
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,1024,4096,0.007872000336647034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,1024,3584,0.008671999908983707
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,1024,4096,0.007712000049650669
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,1024,3584,0.00723200011998415
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,1024,3584,0.0071680000983178616
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,1024,3072,0.008736000396311283
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,1024,3072,0.007424000184983015
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,1024,2560,0.008352000266313553
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,1024,2048,0.007071999832987785
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,1024,2560,0.007296000141650438
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,1024,3072,0.007199999876320362
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,1024,2560,0.007040000054985285
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,1024,2048,0.007455999962985516
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,1024,1536,0.007040000054985285
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,1024,2048,0.007104000076651573
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,1024,1536,0.0074880002066493034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,1024,1024,0.00687999976798892
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,1024,1024,0.007040000054985285
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,1024,1024,0.006304000038653612
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,1024,1536,0.006527999881654978
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,1024,768,0.007007999811321497
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,1024,768,0.00684799998998642
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,1024,768,0.006271999794989824
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,1024,512,0.007296000141650438
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,1024,512,0.006943999789655209
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,1024,256,0.006527999881654978
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,1024,512,0.006399999838322401
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,1024,256,0.007071999832987785
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,1024,256,0.006111999973654747
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,1024,128,0.007104000076651573
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,1024,64,0.007007999811321497
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,1024,64,0.006624000146985054
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,1024,64,0.006144000217318535
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,1024,128,0.006175999995321035
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,1024,32,0.006976000033318996
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,1024,32,0.007199999876320362
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,1024,32,0.006335999816656113
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,768,12288,0.013151999562978745
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,768,16384,0.015807999297976494
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,768,16384,0.011776000261306763
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,1024,128,0.007424000184983015
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,768,16384,0.011071999557316303
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,768,12288,0.01065600011497736
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,768,12288,0.009824000298976898
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,768,10240,0.012032000347971916
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,768,10240,0.01027199998497963
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,768,8192,0.01104000024497509
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,768,10240,0.008960000239312649
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,768,65536,0.044895999133586884
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,768,8192,0.008832000195980072
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,768,65536,0.027615999802947044
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,768,8192,0.00863999966531992
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,768,7168,0.009440000168979168
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,768,7168,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,768,7168,0.008352000266313553
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,768,6144,0.009312000125646591
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,768,6144,0.008767999708652496
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,768,6144,0.007840000092983246
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,768,5120,0.009344000369310379
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,768,5120,0.008927999995648861
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,768,65536,0.02783999964594841
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,768,5120,0.00800000037997961
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,768,4096,0.008191999979317188
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,768,4096,0.007360000163316727
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,768,4096,0.007199999876320362
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,768,3584,0.007327999919652939
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,768,3584,0.008320000022649765
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,768,3072,0.00800000037997961
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,768,3584,0.007360000163316727
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,768,3072,0.0072639998979866505
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,768,3072,0.006816000211983919
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,768,2560,0.008224000222980976
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,768,2560,0.007296000141650438
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,768,2560,0.006976000033318996
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,768,2048,0.007007999811321497
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,768,2048,0.007648000027984381
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,768,2048,0.006783999968320131
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,768,1536,0.007071999832987785
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,768,1536,0.00723200011998415
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,768,1536,0.006304000038653612
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,768,1024,0.00687999976798892
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,768,1024,0.006943999789655209
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,768,1024,0.006111999973654747
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,768,768,0.006912000011652708
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,768,768,0.0072639998979866505
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,768,768,0.006432000081986189
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,768,512,0.0066559999249875546
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,768,512,0.006943999789655209
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,768,256,0.006463999859988689
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,768,512,0.006271999794989824
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,768,128,0.0066559999249875546
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,768,256,0.006976000033318996
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,768,256,0.006335999816656113
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,768,128,0.007391999941319227
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,768,64,0.006399999838322401
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,768,128,0.00595200015231967
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,768,64,0.006816000211983919
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,768,64,0.006207999773323536
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,768,32,0.006591999903321266
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,768,32,0.007071999832987785
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,768,32,0.00598399993032217
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,512,12288,0.012799999676644802
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,512,12288,0.01017600018531084
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,512,16384,0.015072000212967396
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,512,16384,0.011455999687314034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,512,16384,0.010304000228643417
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,512,12288,0.009247999638319016
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,512,10240,0.011680000461637974
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,512,10240,0.00979200005531311
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,512,10240,0.008736000396311283
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,512,8192,0.011327999643981457
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,512,8192,0.008608000352978706
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,512,8192,0.008128000423312187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,512,65536,0.04368000105023384
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,512,7168,0.011359999887645245
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,512,65536,0.026496000587940216
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,512,7168,0.008608000352978706
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,512,7168,0.007935999892652035
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,512,6144,0.009344000369310379
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,512,6144,0.008608000352978706
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,512,5120,0.009056000038981438
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,512,6144,0.007807999849319458
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,512,5120,0.00848000030964613
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,512,4096,0.00825599953532219
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,512,65536,0.023711999878287315
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,512,3584,0.008287999778985977
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,512,4096,0.007104000076651573
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,512,5120,0.0074880002066493034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,512,3584,0.007296000141650438
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,512,3584,0.007135999854654074
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,512,3072,0.00800000037997961
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,512,3072,0.007040000054985285
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,512,2560,0.007104000076651573
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,512,2560,0.0080960001796484
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,512,3072,0.006783999968320131
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,512,2560,0.006912000011652708
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,512,2048,0.0072639998979866505
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,512,2048,0.006304000038653612
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,512,2048,0.007135999854654074
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,512,1536,0.0071680000983178616
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,512,1536,0.007104000076651573
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,512,1536,0.006175999995321035
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,512,1024,0.006912000011652708
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,512,1024,0.007135999854654074
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,512,1024,0.00598399993032217
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,512,768,0.006912000011652708
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,512,4096,0.007104000076651573
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,512,768,0.00723200011998415
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,512,768,0.0063680000603199005
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,512,512,0.0066559999249875546
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,512,512,0.007296000141650438
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,512,512,0.006175999995321035
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,512,256,0.006783999968320131
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,512,256,0.006912000011652708
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,512,256,0.005824000108987093
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,512,128,0.006752000190317631
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,512,128,0.007104000076651573
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,512,128,0.00595200015231967
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,512,64,0.006783999968320131
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,512,64,0.007104000076651573
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,512,64,0.0058559998869895935
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,512,32,0.006912000011652708
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,512,32,0.006560000125318766
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,512,32,0.006240000016987324
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,256,12288,0.01244799979031086
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,256,12288,0.009920000098645687
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,256,16384,0.014751999638974667
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,256,16384,0.011359999887645245
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,256,16384,0.009472000412642956
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,256,12288,0.00848000030964613
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,256,10240,0.011615999974310398
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,256,10240,0.010208000428974628
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,256,10240,0.008224000222980976
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,256,8192,0.01027199998497963
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,256,8192,0.00848000030964613
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,256,8192,0.00774399982765317
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,256,65536,0.042047999799251556
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,256,65536,0.025728000327944756
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,256,7168,0.010208000428974628
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,256,7168,0.008415999822318554
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,256,7168,0.0077760000713169575
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,256,6144,0.009216000325977802
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,256,5120,0.008895999751985073
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,256,6144,0.00854399986565113
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,256,5120,0.008895999751985073
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,256,6144,0.007424000184983015
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,256,5120,0.007199999876320362
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,256,65536,0.02179200015962124
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,256,4096,0.007935999892652035
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,256,4096,0.007327999919652939
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,256,3584,0.0080960001796484
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,256,4096,0.006624000146985054
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,256,3584,0.007104000076651573
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,256,3584,0.007007999811321497
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,256,3072,0.007872000336647034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,256,3072,0.007135999854654074
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,256,3072,0.006591999903321266
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,256,2560,0.007872000336647034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,256,2560,0.007296000141650438
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,256,2048,0.006752000190317631
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,256,2048,0.007903999648988247
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,256,2560,0.006560000125318766
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,256,2048,0.006175999995321035
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,256,1536,0.00687999976798892
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,256,1536,0.006816000211983919
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,256,1024,0.006591999903321266
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,256,1536,0.0060800001956522465
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,256,1024,0.00687999976798892
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,256,768,0.00684799998998642
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,256,1024,0.006144000217318535
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,256,768,0.007104000076651573
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,256,512,0.006688000168651342
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,256,768,0.006207999773323536
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,256,512,0.007327999919652939
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,256,256,0.006688000168651342
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,256,512,0.005919999908655882
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,256,256,0.00687999976798892
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,256,256,0.006335999816656113
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,256,128,0.006432000081986189
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,256,128,0.006719999946653843
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,256,128,0.0063680000603199005
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,256,64,0.006591999903321266
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,256,64,0.006912000011652708
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,256,32,0.006624000146985054
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,256,64,0.006304000038653612
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,256,32,0.007040000054985285
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,256,32,0.006111999973654747
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,128,12288,0.012864000163972378
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,128,12288,0.009472000412642956
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,128,16384,0.0144640002399683
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,128,16384,0.009375999681651592
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,128,12288,0.00848000030964613
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,128,10240,0.01119999960064888
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,128,10240,0.00979200005531311
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,128,8192,0.010304000228643417
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,128,8192,0.00825599953532219
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,128,10240,0.007872000336647034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,128,65536,0.04153599962592125
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,128,65536,0.025407999753952026
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,128,8192,0.0077760000713169575
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,128,16384,0.011103999800980091
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,128,7168,0.01017600018531084
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,128,7168,0.00848000030964613
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,128,6144,0.009151999838650227
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,128,6144,0.00825599953532219
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,128,7168,0.007679999805986881
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,128,6144,0.007296000141650438
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,128,65536,0.020896000787615776
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,128,5120,0.008991999551653862
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,128,5120,0.0080960001796484
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,128,5120,0.00687999976798892
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,128,4096,0.007968000136315823
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,128,4096,0.007104000076651573
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,128,4096,0.006560000125318766
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,128,3584,0.007935999892652035
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,128,3584,0.007199999876320362
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,128,3072,0.0077760000713169575
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,128,3584,0.006560000125318766
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,128,3072,0.006943999789655209
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,128,2560,0.00687999976798892
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,128,2560,0.007872000336647034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,128,3072,0.00684799998998642
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,128,2560,0.006432000081986189
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,128,2048,0.00684799998998642
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,128,1536,0.006591999903321266
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,128,2048,0.014431999996304512
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,128,2048,0.005919999908655882
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,128,1024,0.006976000033318996
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,128,1536,0.00684799998998642
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,128,1536,0.005888000130653381
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,128,1024,0.007104000076651573
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,128,1024,0.006144000217318535
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,128,768,0.006496000103652477
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,128,768,0.006976000033318996
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,128,512,0.006943999789655209
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,128,768,0.006047999951988459
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,128,512,0.006783999968320131
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,128,256,0.006816000211983919
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,128,512,0.006111999973654747
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,128,256,0.007296000141650438
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,128,128,0.006752000190317631
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,128,256,0.005919999908655882
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,128,128,0.006976000033318996
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,128,64,0.006591999903321266
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,128,64,0.007104000076651573
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,128,128,0.005824000108987093
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,128,64,0.006144000217318535
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,128,32,0.006624000146985054
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,128,32,0.006976000033318996
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,128,32,0.005919999908655882
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,64,12288,0.012319999746978283
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,64,12288,0.009375999681651592
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,64,16384,0.014303999952971935
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,64,16384,0.011392000131309032
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,64,16384,0.009344000369310379
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,64,12288,0.008415999822318554
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,64,10240,0.01152000017464161
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,64,10240,0.009503999724984169
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,64,10240,0.0077760000713169575
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,64,8192,0.010879999957978725
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,64,8192,0.00825599953532219
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,64,8192,0.007391999941319227
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,64,65536,0.02595200017094612
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,64,65536,0.04150399938225746
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,64,7168,0.010208000428974628
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,64,7168,0.008320000022649765
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,64,7168,0.007679999805986881
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,64,6144,0.00863999966531992
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,64,6144,0.008352000266313553
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,64,5120,0.009344000369310379
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,64,65536,0.02067199908196926
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,64,5120,0.008352000266313553
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,64,6144,0.00687999976798892
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,64,5120,0.007071999832987785
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,64,4096,0.007968000136315823
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,64,4096,0.006816000211983919
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,64,3584,0.007872000336647034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,64,3584,0.00687999976798892
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,64,4096,0.006624000146985054
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,64,3072,0.006976000033318996
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,64,3584,0.006335999816656113
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,64,3072,0.007840000092983246
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,64,3072,0.006496000103652477
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,64,2560,0.007071999832987785
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,64,2560,0.008063999935984612
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,64,2048,0.006719999946653843
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,64,1536,0.006560000125318766
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,64,2560,0.006335999816656113
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,64,2048,0.0058559998869895935
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,64,1536,0.006783999968320131
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,64,1024,0.006752000190317631
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,64,1536,0.00595200015231967
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,64,1024,0.006752000190317631
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,64,768,0.006560000125318766
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,64,1024,0.0058559998869895935
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,64,768,0.007071999832987785
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,64,768,0.006144000217318535
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,64,512,0.00684799998998642
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,64,512,0.00684799998998642
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,64,512,0.005919999908655882
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,64,256,0.006240000016987324
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,64,256,0.006912000011652708
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,64,256,0.0058559998869895935
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,64,128,0.006399999838322401
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,64,128,0.007199999876320362
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,64,128,0.006240000016987324
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,64,2048,0.014816000126302242
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,64,64,0.006560000125318766
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,64,64,0.006688000168651342
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,64,64,0.005919999908655882
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,64,32,0.006496000103652477
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,64,32,0.006816000211983919
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,64,32,0.006111999973654747
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,32,12288,0.012223999947309494
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,32,12288,0.010015999898314476
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,32,16384,0.01408000010997057
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,32,16384,0.011008000001311302
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,32,16384,0.009247999638319016
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,32,12288,0.008287999778985977
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,32,10240,0.011359999887645245
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,32,10240,0.009631999768316746
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,32,8192,0.01017600018531084
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,32,10240,0.007903999648988247
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,32,65536,0.04156799986958504
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,32,8192,0.008287999778985977
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,32,65536,0.02537599951028824
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,32,7168,0.010239999741315842
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,32,8192,0.007712000049650669
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,32,7168,0.008287999778985977
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,32,7168,0.007296000141650438
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,32,6144,0.009279999881982803
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,32,6144,0.008415999822318554
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,32,5120,0.008799999952316284
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,32,6144,0.006943999789655209
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,32,5120,0.008383999578654766
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,32,65536,0.020416000857949257
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,32,5120,0.00687999976798892
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,32,4096,0.007872000336647034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,32,4096,0.007296000141650438
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,32,3584,0.007584000006318092
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,32,4096,0.006335999816656113
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,32,3584,0.00684799998998642
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,32,3072,0.007903999648988247
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,32,3584,0.006591999903321266
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,32,3072,0.006783999968320131
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,32,3072,0.0063680000603199005
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,32,2560,0.0080960001796484
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,32,2560,0.0066559999249875546
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,32,2048,0.014336000196635723
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,32,2048,0.00684799998998642
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,32,2560,0.006496000103652477
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,32,2048,0.006144000217318535
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,32,1536,0.006912000011652708
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,32,1536,0.006752000190317631
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,32,1536,0.006335999816656113
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,32,1024,0.0066559999249875546
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,32,1024,0.006560000125318766
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,32,1024,0.0058559998869895935
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,32,768,0.0066559999249875546
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,32,768,0.006752000190317631
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,32,512,0.006624000146985054
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,32,768,0.006016000173985958
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,32,512,0.0066559999249875546
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,32,256,0.006560000125318766
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,32,512,0.005791999865323305
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,32,256,0.007615999784320593
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,32,128,0.006719999946653843
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,32,256,0.005760000087320805
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,32,128,0.00684799998998642
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,32,128,0.005824000108987093
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,32,64,0.006688000168651342
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,32,64,0.006816000211983919
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,2,32,32,0.006335999816656113
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,32,64,0.0058559998869895935
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,2,32,32,0.006688000168651342
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,2,32,32,0.006016000173985958
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,65536,12288,0.13846400380134583
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,65536,16384,0.18086400628089905
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,65536,12288,0.26870399713516235
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,65536,10240,0.22288000583648682
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,65536,12288,0.26070401072502136
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,65536,16384,0.35139200091362
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,65536,10240,0.12003199756145477
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,65536,8192,0.09433600306510925
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,65536,8192,0.17791999876499176
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,65536,16384,0.3484799861907959
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,65536,10240,0.21779200434684753
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,65536,7168,0.08521600067615509
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,65536,7168,0.15987199544906616
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,65536,8192,0.17420800030231476
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,65536,6144,0.13526399433612823
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,65536,6144,0.07583999633789062
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,65536,5120,0.06998399645090103
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,65536,5120,0.1157120019197464
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,65536,7168,0.15516799688339233
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,65536,6144,0.13235199451446533
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,65536,4096,0.09225600212812424
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,65536,5120,0.11161600053310394
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,65536,4096,0.05113599821925163
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,65536,3584,0.046879999339580536
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,65536,3584,0.08153600245714188
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,65536,3072,0.042399998754262924
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,65536,4096,0.09087999910116196
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,65536,3072,0.0732479989528656
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,65536,3584,0.08064000308513641
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,65536,2560,0.039103999733924866
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,65536,2560,0.06489600241184235
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,65536,3072,0.06995200365781784
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,65536,2560,0.06176000088453293
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,65536,2048,0.0342399999499321
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,65536,2048,0.04972799867391586
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,65536,1536,0.0395519994199276
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,65536,2048,0.049375999718904495
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,65536,1536,0.03200000151991844
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,65536,1024,0.031199999153614044
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,65536,1536,0.040608000010252
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,65536,1024,0.02844800055027008
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,65536,768,0.02800000086426735
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,65536,768,0.028704000636935234
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,65536,1024,0.03190400078892708
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,65536,512,0.02316799946129322
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,65536,512,0.01945599913597107
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,65536,768,0.029440000653266907
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,65536,256,0.01696000061929226
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,65536,512,0.02755199931561947
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,65536,256,0.016416000202298164
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,65536,128,0.013248000293970108
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,65536,256,0.026944000273942947
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,65536,128,0.013567999936640263
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,65536,128,0.026528000831604004
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,65536,64,0.016383999958634377
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,65536,64,0.01600000075995922
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,65536,64,0.027135999873280525
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,65536,32,0.016095999628305435
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,65536,32,0.016224000602960587
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,65536,32,0.02751999907195568
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,16384,12288,0.04124800115823746
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,16384,12288,0.07356800138950348
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,16384,16384,0.051872000098228455
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,16384,16384,0.09494400024414062
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,16384,16384,0.09212800115346909
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,16384,10240,0.03580800071358681
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,16384,10240,0.06099199876189232
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,16384,12288,0.07043199986219406
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,16384,10240,0.05984000116586685
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,16384,8192,0.05177599936723709
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,16384,8192,0.02921600081026554
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,16384,7168,0.04636799916625023
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,16384,8192,0.04841599985957146
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,16384,7168,0.026688000187277794
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,16384,65536,0.18297599256038666
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,16384,7168,0.044224001467227936
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,16384,6144,0.039872001856565475
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,16384,6144,0.024671999737620354
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,16384,5120,0.03417599946260452
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,16384,5120,0.0226879995316267
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,16384,6144,0.03788800165057182
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,16384,65536,0.36265599727630615
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,16384,4096,0.028672000393271446
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,16384,4096,0.01833599992096424
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,16384,5120,0.03420799970626831
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,16384,3584,0.01727999933063984
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,16384,4096,0.027744000777602196
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,16384,3584,0.025760000571608543
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,16384,3584,0.02550400048494339
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,16384,3072,0.023391999304294586
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,16384,3072,0.015936000272631645
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,16384,3072,0.023423999547958374
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,16384,2560,0.014911999925971031
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,16384,2560,0.02163200080394745
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,16384,65536,0.355679988861084
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,16384,2560,0.020864000543951988
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,16384,2048,0.01744000054895878
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,16384,2048,0.013311999849975109
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,16384,1536,0.015039999969303608
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,16384,2048,0.01756799966096878
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,16384,1536,0.012896000407636166
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,16384,1024,0.011680000461637974
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,16384,1024,0.013024000450968742
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,16384,1536,0.01462399959564209
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,16384,768,0.011680000461637974
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,16384,1024,0.013183999806642532
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,16384,768,0.01190400030463934
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,16384,768,0.012223999947309494
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,16384,512,0.011136000044643879
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,16384,512,0.011487999930977821
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,16384,256,0.010400000028312206
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,16384,256,0.010847999714314938
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,16384,512,0.01152000017464161
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,16384,256,0.011744000017642975
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,16384,128,0.010975999757647514
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,16384,128,0.01119999960064888
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,16384,64,0.010304000228643417
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,16384,64,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,16384,64,0.01119999960064888
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,16384,32,0.010367999784648418
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,16384,32,0.010432000271975994
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,16384,32,0.011327999643981457
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,16384,128,0.01056000031530857
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,12288,12288,0.060127999633550644
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,12288,16384,0.04227200150489807
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,12288,16384,0.07577600330114365
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,12288,12288,0.033824000507593155
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,12288,16384,0.07091200351715088
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,12288,12288,0.054687999188899994
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,12288,10240,0.03017600066959858
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,12288,10240,0.0498879998922348
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,12288,8192,0.041760001331567764
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,12288,10240,0.04623999819159508
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,12288,8192,0.024607999250292778
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,12288,7168,0.037696000188589096
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,12288,65536,0.14975999295711517
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,12288,8192,0.03824000060558319
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,12288,7168,0.023135999217629433
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,12288,7168,0.03551999852061272
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,12288,65536,0.28537601232528687
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,12288,6144,0.03251200169324875
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,12288,6144,0.02099199965596199
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,12288,5120,0.029120000079274178
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,12288,5120,0.02067199908196926
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,12288,6144,0.031231999397277832
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,12288,4096,0.015456000342965126
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,12288,5120,0.026976000517606735
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,12288,4096,0.023552000522613525
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,12288,4096,0.022112000733613968
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,12288,3584,0.015456000342965126
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,12288,3584,0.02208000048995018
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,12288,3584,0.021023999899625778
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,12288,65536,0.26627200841903687
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,12288,3072,0.020191999152302742
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,12288,3072,0.014047999866306782
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,12288,3072,0.018592000007629395
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,12288,2560,0.013728000223636627
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,12288,2560,0.01897599920630455
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,12288,2560,0.017376000061631203
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,12288,2048,0.01500799972563982
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,12288,2048,0.012223999947309494
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,12288,2048,0.016063999384641647
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,12288,1536,0.013535999692976475
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,12288,1536,0.011744000017642975
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,12288,1536,0.012608000077307224
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,12288,1024,0.011296000331640244
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,12288,1024,0.012191999703645706
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,12288,768,0.010623999871313572
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,12288,1024,0.01152000017464161
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,12288,768,0.010975999757647514
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,12288,512,0.010367999784648418
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,12288,512,0.011008000001311302
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,12288,768,0.01065600011497736
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,12288,512,0.009952000342309475
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,12288,256,0.010015999898314476
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,12288,256,0.011008000001311302
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,12288,256,0.01027199998497963
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,12288,128,0.01027199998497963
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,12288,128,0.010463999584317207
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,12288,128,0.010048000141978264
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,12288,64,0.009696000255644321
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,12288,64,0.010623999871313572
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,12288,64,0.010080000385642052
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,12288,32,0.010143999941647053
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,12288,32,0.009759999811649323
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,12288,32,0.010048000141978264
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,10240,12288,0.03155200183391571
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,10240,12288,0.05644800141453743
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,10240,16384,0.039103999733924866
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,10240,16384,0.07145600020885468
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,10240,16384,0.0607680007815361
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,10240,12288,0.04630399867892265
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,10240,10240,0.04726399853825569
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,10240,10240,0.02812799997627735
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,10240,10240,0.04009599983692169
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,10240,8192,0.03830400109291077
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,10240,8192,0.022592000663280487
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,10240,65536,0.13568000495433807
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,10240,8192,0.03296000137925148
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,10240,7168,0.02147199958562851
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,10240,7168,0.036031998693943024
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,10240,7168,0.030112000182271004
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,10240,6144,0.031039999797940254
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,10240,65536,0.2677119970321655
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,10240,6144,0.020320000126957893
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,10240,6144,0.02703999914228916
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,10240,5120,0.027168000116944313
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,10240,5120,0.0191040001809597
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,10240,4096,0.014368000440299511
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,10240,5120,0.0244159996509552
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,10240,3584,0.0208320003002882
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,10240,4096,0.02051199972629547
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,10240,3584,0.014655999839305878
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,10240,3584,0.01852799952030182
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,10240,65536,0.22460800409317017
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,10240,3072,0.019711999222636223
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,10240,3072,0.013088000006973743
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,10240,2560,0.012415999546647072
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,10240,2560,0.01817600056529045
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,10240,3072,0.017184000462293625
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,10240,2560,0.015936000272631645
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,10240,2048,0.014240000396966934
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,10240,2048,0.011455999687314034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,10240,2048,0.01408000010997057
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,10240,4096,0.022784000262618065
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,10240,1536,0.011392000131309032
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,10240,1536,0.012319999746978283
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,10240,1024,0.011008000001311302
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,10240,1536,0.01190400030463934
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,10240,1024,0.010688000358641148
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,10240,1024,0.010143999941647053
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,10240,768,0.010847999714314938
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,10240,768,0.01065600011497736
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,10240,512,0.009920000098645687
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,10240,768,0.010239999741315842
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,10240,512,0.01027199998497963
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,10240,256,0.009727999567985535
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,10240,512,0.00979200005531311
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,10240,256,0.010304000228643417
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,10240,128,0.009344000369310379
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,10240,256,0.009503999724984169
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,10240,128,0.010367999784648418
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,10240,64,0.009472000412642956
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,10240,128,0.009279999881982803
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,10240,64,0.010239999741315842
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,10240,64,0.009568000212311745
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,10240,32,0.009344000369310379
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,10240,32,0.009440000168979168
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,10240,32,0.009759999811649323
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,8192,12288,0.023455999791622162
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,8192,12288,0.04092799872159958
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,8192,16384,0.030239999294281006
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,8192,16384,0.05116799846291542
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,8192,16384,0.04886399954557419
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,8192,10240,0.035071998834609985
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,8192,12288,0.03827200084924698
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,8192,10240,0.02208000048995018
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,8192,10240,0.0331839993596077
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,8192,8192,0.028896000236272812
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,8192,8192,0.01817600056529045
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,8192,7168,0.026655999943614006
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,8192,8192,0.027583999559283257
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,8192,65536,0.09721600264310837
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,8192,7168,0.017152000218629837
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,8192,6144,0.023135999217629433
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,8192,7168,0.024992000311613083
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,8192,65536,0.18582400679588318
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,8192,6144,0.015904000028967857
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,8192,6144,0.022911999374628067
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,8192,5120,0.02051199972629547
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,8192,5120,0.014560000039637089
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,8192,4096,0.01711999997496605
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,8192,5120,0.019999999552965164
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,8192,4096,0.012256000190973282
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,8192,4096,0.018112000077962875
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,8192,3584,0.016127999871969223
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,8192,3584,0.011711999773979187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,8192,3584,0.015904000028967857
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,8192,3072,0.010847999714314938
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,8192,3072,0.014560000039637089
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,8192,3072,0.01539199985563755
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,8192,2560,0.013728000223636627
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,8192,65536,0.1801919937133789
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,8192,2560,0.010432000271975994
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,8192,2560,0.013535999692976475
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,8192,2048,0.012000000104308128
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,8192,2048,0.009503999724984169
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,8192,2048,0.01190400030463934
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,8192,1536,0.010143999941647053
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,8192,1536,0.009568000212311745
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,8192,1024,0.009216000325977802
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,8192,1024,0.008704000152647495
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,8192,1536,0.011071999557316303
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,8192,1024,0.00979200005531311
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,8192,768,0.00863999966531992
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,8192,768,0.008608000352978706
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,8192,768,0.00886400043964386
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,8192,512,0.009568000212311745
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,8192,512,0.008608000352978706
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,8192,512,0.008671999908983707
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,8192,256,0.008448000065982342
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,8192,256,0.008448000065982342
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,8192,256,0.008576000109314919
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,8192,128,0.008352000266313553
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,8192,128,0.007679999805986881
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,8192,64,0.0080960001796484
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,8192,128,0.008704000152647495
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,8192,64,0.008320000022649765
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,8192,64,0.00886400043964386
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,8192,32,0.00800000037997961
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,8192,32,0.007840000092983246
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,8192,32,0.008383999578654766
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,7168,12288,0.022655999287962914
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,7168,12288,0.03705599904060364
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,7168,16384,0.0480320006608963
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,7168,16384,0.027103999629616737
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,7168,16384,0.0451200008392334
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,7168,12288,0.03510399907827377
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,7168,10240,0.031199999153614044
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,7168,10240,0.01990400068461895
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,7168,10240,0.030527999624609947
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,7168,8192,0.02675200067460537
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,7168,8192,0.016704000532627106
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,7168,8192,0.026528000831604004
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,7168,7168,0.02412799932062626
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,7168,65536,0.08723200112581253
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,7168,7168,0.01587199978530407
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,7168,6144,0.021344000473618507
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,7168,65536,0.16704000532627106
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,7168,7168,0.023264000192284584
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,7168,6144,0.014368000440299511
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,7168,5120,0.01929599978029728
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,7168,5120,0.013919999822974205
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,7168,6144,0.021407999098300934
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,7168,5120,0.018464000895619392
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,7168,4096,0.016127999871969223
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,7168,4096,0.011935999616980553
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,7168,3584,0.01500799972563982
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,7168,4096,0.016383999958634377
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,7168,3584,0.011296000331640244
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,7168,3584,0.015552000142633915
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,7168,3072,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,7168,3072,0.01398400031030178
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,7168,2560,0.013407999649643898
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,7168,3072,0.014303999952971935
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,7168,2560,0.009983999654650688
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,7168,2560,0.012959999963641167
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,7168,2048,0.011231999844312668
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,7168,2048,0.009503999724984169
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,7168,2048,0.011935999616980553
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,7168,1536,0.01017600018531084
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,7168,1536,0.009375999681651592
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,7168,1024,0.008832000195980072
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,7168,1536,0.01027199998497963
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,7168,1024,0.008832000195980072
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,7168,1024,0.009312000125646591
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,7168,768,0.008415999822318554
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,7168,768,0.008448000065982342
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,7168,512,0.008415999822318554
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,7168,768,0.00902399979531765
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,7168,512,0.008352000266313553
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,7168,65536,0.16278399527072906
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,7168,512,0.008608000352978706
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,7168,256,0.008128000423312187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,7168,256,0.008031999692320824
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,7168,256,0.00848000030964613
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,7168,128,0.008063999935984612
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,7168,128,0.008415999822318554
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,7168,128,0.008704000152647495
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,7168,64,0.007968000136315823
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,7168,64,0.008895999751985073
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,7168,64,0.008287999778985977
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,7168,32,0.007648000027984381
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,7168,32,0.00774399982765317
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,7168,32,0.008448000065982342
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,6144,12288,0.03299200162291527
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,6144,12288,0.020096000283956528
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,6144,16384,0.024288000538945198
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,6144,16384,0.04230400174856186
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,6144,16384,0.03827200084924698
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,6144,12288,0.030719999223947525
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,6144,10240,0.02848000079393387
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,6144,10240,0.01849599927663803
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,6144,8192,0.02380800060927868
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,6144,10240,0.02627200074493885
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,6144,8192,0.015647999942302704
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,6144,7168,0.02208000048995018
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,6144,8192,0.02236800082027912
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,6144,7168,0.014240000396966934
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,6144,65536,0.07692799717187881
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,6144,6144,0.0197759997099638
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,6144,7168,0.020735999569296837
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,6144,6144,0.013856000266969204
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,6144,65536,0.14313599467277527
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,6144,5120,0.017216000705957413
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,6144,5120,0.013344000093638897
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,6144,6144,0.018400000408291817
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,6144,5120,0.016448000445961952
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,6144,4096,0.015135999768972397
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,6144,4096,0.011392000131309032
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,6144,4096,0.01414399966597557
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,6144,65536,0.13769599795341492
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,6144,3584,0.013856000266969204
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,6144,3584,0.01027199998497963
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,6144,3584,0.013567999936640263
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,6144,3072,0.010239999741315842
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,6144,3072,0.013376000337302685
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,6144,3072,0.01228800043463707
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,6144,2560,0.009727999567985535
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,6144,2560,0.012768000364303589
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,6144,2560,0.011711999773979187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,6144,2048,0.010528000071644783
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,6144,1536,0.010400000028312206
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,6144,2048,0.010432000271975994
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,6144,1536,0.009216000325977802
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,6144,1536,0.009759999811649323
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,6144,1024,0.008704000152647495
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,6144,1024,0.008415999822318554
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,6144,1024,0.008704000152647495
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,6144,768,0.008704000152647495
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,6144,768,0.00886400043964386
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,6144,512,0.0077760000713169575
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,6144,512,0.008287999778985977
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,6144,768,0.008224000222980976
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,6144,512,0.00800000037997961
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,6144,256,0.007903999648988247
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,6144,256,0.008415999822318554
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,6144,256,0.008128000423312187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,6144,128,0.007712000049650669
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,6144,128,0.00825599953532219
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,6144,128,0.0077760000713169575
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,6144,64,0.00774399982765317
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,6144,64,0.00825599953532219
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,6144,32,0.007712000049650669
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,6144,32,0.007104000076651573
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,6144,64,0.008352000266313553
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,6144,2048,0.009088000282645226
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,6144,32,0.007615999784320593
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,5120,12288,0.01833599992096424
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,5120,12288,0.02921600081026554
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,5120,16384,0.022048000246286392
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,5120,16384,0.03686400130391121
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,5120,16384,0.03328000009059906
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,5120,10240,0.025280000641942024
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,5120,12288,0.026496000587940216
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,5120,10240,0.016448000445961952
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,5120,10240,0.023520000278949738
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,5120,8192,0.014527999795973301
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,5120,8192,0.022048000246286392
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,5120,7168,0.020096000283956528
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,5120,8192,0.019648000597953796
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,5120,7168,0.013663999736309052
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,5120,65536,0.06755200028419495
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,5120,7168,0.01849599927663803
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,5120,6144,0.01740800030529499
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,5120,6144,0.012992000207304955
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,5120,65536,0.12303999811410904
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,5120,6144,0.0163199994713068
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,5120,5120,0.016063999384641647
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,5120,5120,0.012608000077307224
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,5120,4096,0.010143999941647053
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,5120,4096,0.013952000066637993
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,5120,5120,0.014911999925971031
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,5120,4096,0.012992000207304955
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,5120,3584,0.013024000450968742
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,5120,3584,0.01017600018531084
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,5120,65536,0.1159679964184761
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,5120,3584,0.012608000077307224
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,5120,3072,0.010688000358641148
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,5120,3072,0.011872000060975552
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,5120,2560,0.011872000060975552
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,5120,3072,0.012415999546647072
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,5120,2560,0.009631999768316746
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,5120,2560,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,5120,2048,0.010111999697983265
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,5120,2048,0.008799999952316284
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,5120,1536,0.009151999838650227
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,5120,2048,0.01033599954098463
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,5120,1024,0.00848000030964613
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,5120,1536,0.00902399979531765
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,5120,1024,0.008511999621987343
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,5120,1536,0.008991999551653862
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,5120,1024,0.008224000222980976
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,5120,768,0.0080960001796484
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,5120,768,0.008191999979317188
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,5120,512,0.0080960001796484
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,5120,512,0.008191999979317188
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,5120,768,0.007840000092983246
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,5120,256,0.007968000136315823
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,5120,512,0.007807999849319458
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,5120,256,0.008191999979317188
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,5120,256,0.007552000228315592
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,5120,128,0.007584000006318092
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,5120,128,0.008063999935984612
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,5120,128,0.007552000228315592
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,5120,64,0.007840000092983246
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,5120,64,0.008063999935984612
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,5120,64,0.007552000228315592
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,5120,32,0.0074880002066493034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,5120,32,0.007968000136315823
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,5120,32,0.007327999919652939
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,4096,12288,0.02502400055527687
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,4096,16384,0.03222399950027466
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,4096,16384,0.019936000928282738
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,4096,16384,0.02828799933195114
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,4096,12288,0.0226879995316267
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,4096,10240,0.02319999970495701
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,4096,10240,0.015615999698638916
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,4096,8192,0.018912000581622124
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,4096,12288,0.016607999801635742
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,4096,10240,0.020479999482631683
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,4096,8192,0.013151999562978745
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,4096,8192,0.017311999574303627
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,4096,65536,0.05859199911355972
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,4096,65536,0.10675200074911118
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,4096,7168,0.018112000077962875
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,4096,7168,0.012703999876976013
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,4096,6144,0.01568000018596649
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,4096,7168,0.016224000602960587
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,4096,6144,0.012575999833643436
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,4096,5120,0.014783999882638454
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,4096,5120,0.012415999546647072
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,4096,6144,0.014368000440299511
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,4096,5120,0.013791999779641628
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,4096,4096,0.012512000277638435
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,4096,4096,0.009824000298976898
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,4096,65536,0.09836799651384354
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,4096,3584,0.011935999616980553
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,4096,3584,0.009503999724984169
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,4096,4096,0.012095999903976917
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,4096,3584,0.010879999957978725
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,4096,3072,0.011648000217974186
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,4096,3072,0.009344000369310379
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,4096,3072,0.010367999784648418
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,4096,2560,0.011359999887645245
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,4096,2560,0.008799999952316284
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,4096,2048,0.009631999768316746
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,4096,2560,0.009983999654650688
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,4096,2048,0.008671999908983707
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,4096,2048,0.00902399979531765
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,4096,1536,0.008799999952316284
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,4096,1536,0.008383999578654766
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,4096,1024,0.008576000109314919
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,4096,1024,0.008287999778985977
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,4096,768,0.00800000037997961
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,4096,1536,0.008608000352978706
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,4096,1024,0.007903999648988247
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,4096,768,0.008383999578654766
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,4096,768,0.0074880002066493034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,4096,512,0.007903999648988247
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,4096,512,0.0080960001796484
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,4096,256,0.007712000049650669
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,4096,256,0.007391999941319227
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,4096,256,0.0080960001796484
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,4096,512,0.007584000006318092
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,4096,128,0.008287999778985977
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,4096,128,0.008191999979317188
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,4096,128,0.007327999919652939
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,4096,64,0.0074880002066493034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,4096,64,0.00800000037997961
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,4096,64,0.007104000076651573
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,4096,32,0.007712000049650669
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,4096,32,0.007648000027984381
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,4096,32,0.007519999984651804
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,3584,12288,0.024639999493956566
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,3584,12288,0.015904000028967857
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,3584,16384,0.01849599927663803
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,3584,16384,0.029120000079274178
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,3584,16384,0.026367999613285065
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,3584,12288,0.022112000733613968
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,3584,10240,0.01500799972563982
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,3584,10240,0.020864000543951988
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,3584,10240,0.018880000337958336
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,3584,8192,0.01817600056529045
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,3584,8192,0.012703999876976013
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,3584,7168,0.01692800037562847
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,3584,8192,0.016416000202298164
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,3584,7168,0.012256000190973282
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,3584,65536,0.054816000163555145
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,3584,65536,0.09785600006580353
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,3584,7168,0.015552000142633915
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,3584,6144,0.015296000055968761
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,3584,6144,0.011839999817311764
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,3584,5120,0.01414399966597557
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,3584,6144,0.014911999925971031
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,3584,5120,0.011711999773979187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,3584,5120,0.013120000250637531
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,3584,4096,0.012703999876976013
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,3584,4096,0.009696000255644321
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,3584,65536,0.08883199840784073
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,3584,3584,0.011711999773979187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,3584,3584,0.009344000369310379
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,3584,3584,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,3584,4096,0.011776000261306763
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,3584,3072,0.00886400043964386
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,3584,3072,0.011327999643981457
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,3584,3072,0.010367999784648418
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,3584,2560,0.011103999800980091
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,3584,2560,0.008799999952316284
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,3584,2048,0.008960000239312649
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,3584,2048,0.008287999778985977
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,3584,2560,0.00979200005531311
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,3584,1536,0.008415999822318554
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,3584,2048,0.008736000396311283
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,3584,1536,0.00854399986565113
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,3584,1536,0.00854399986565113
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,3584,1024,0.008031999692320824
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,3584,768,0.007872000336647034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,3584,1024,0.008128000423312187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,3584,1024,0.0077760000713169575
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,3584,768,0.008320000022649765
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,3584,512,0.007968000136315823
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,3584,768,0.007552000228315592
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,3584,512,0.007968000136315823
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,3584,256,0.007807999849319458
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,3584,128,0.007903999648988247
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,3584,256,0.00800000037997961
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,3584,256,0.007296000141650438
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,3584,128,0.007840000092983246
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,3584,64,0.007807999849319458
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,3584,128,0.007327999919652939
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,3584,64,0.0081599997356534
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,3584,32,0.007552000228315592
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,3584,64,0.007296000141650438
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,3584,32,0.007648000027984381
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,3584,32,0.0072639998979866505
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,3072,12288,0.022112000733613968
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,3072,16384,0.027583999559283257
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,3072,16384,0.01945599913597107
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,3584,512,0.0072639998979866505
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,3072,12288,0.015039999969303608
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,3072,16384,0.02252800017595291
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,3072,12288,0.018432000651955605
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,3072,10240,0.020096000283956528
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,3072,10240,0.014879999682307243
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,3072,8192,0.01696000061929226
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,3072,10240,0.016256000846624374
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,3072,8192,0.01244799979031086
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,3072,7168,0.017055999487638474
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,3072,8192,0.013856000266969204
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,3072,65536,0.051231998950242996
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,3072,65536,0.09008000046014786
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,3072,7168,0.012415999546647072
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,3072,7168,0.012959999963641167
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,3072,6144,0.014047999866306782
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,3072,6144,0.011455999687314034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,3072,5120,0.01119999960064888
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,3072,5120,0.01360000018030405
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,3072,6144,0.012160000391304493
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,3072,5120,0.011359999887645245
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,3072,4096,0.011296000331640244
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,3072,4096,0.00902399979531765
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,3072,65536,0.07264000177383423
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,3072,3584,0.01119999960064888
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,3072,3584,0.009216000325977802
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,3072,4096,0.010048000141978264
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,3072,3072,0.00854399986565113
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,3072,3072,0.01065600011497736
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,3072,3584,0.0098879998549819
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,3072,3072,0.008895999751985073
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,3072,2560,0.010688000358641148
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,3072,2560,0.008576000109314919
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,3072,2048,0.008736000396311283
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,3072,2560,0.00886400043964386
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,3072,2048,0.008191999979317188
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,3072,2048,0.008320000022649765
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,3072,1536,0.00848000030964613
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,3072,1536,0.008799999952316284
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,3072,1536,0.007807999849319458
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,3072,1024,0.009056000038981438
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,3072,1024,0.008031999692320824
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,3072,768,0.00800000037997961
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,3072,1024,0.007615999784320593
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,3072,768,0.008383999578654766
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,3072,512,0.007519999984651804
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,3072,768,0.007071999832987785
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,3072,512,0.008383999578654766
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,3072,256,0.007807999849319458
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,3072,256,0.007712000049650669
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,3072,512,0.006912000011652708
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,3072,128,0.007679999805986881
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,3072,256,0.006816000211983919
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,3072,128,0.0077760000713169575
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,3072,128,0.00684799998998642
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,3072,64,0.007615999784320593
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,3072,64,0.007840000092983246
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,3072,32,0.007391999941319227
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,3072,64,0.006624000146985054
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,3072,32,0.00723200011998415
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,3072,32,0.00723200011998415
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,2560,12288,0.020447999238967896
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,2560,12288,0.014592000283300877
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,2560,16384,0.01744000054895878
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,2560,16384,0.02595200017094612
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,2560,16384,0.019999999552965164
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,2560,12288,0.016127999871969223
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,2560,10240,0.01894400082528591
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,2560,10240,0.01398400031030178
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,2560,10240,0.014783999882638454
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,2560,8192,0.016416000202298164
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,2560,8192,0.011776000261306763
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,2560,7168,0.015359999611973763
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,2560,8192,0.012640000320971012
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,2560,65536,0.047680001705884933
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,2560,65536,0.078015998005867
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,2560,7168,0.011455999687314034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,2560,7168,0.012191999703645706
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,2560,6144,0.013663999736309052
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,2560,5120,0.013472000136971474
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,2560,5120,0.011487999930977821
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,2560,6144,0.011296000331640244
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,2560,5120,0.010143999941647053
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,2560,65536,0.06159999966621399
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,2560,4096,0.008767999708652496
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,2560,4096,0.011103999800980091
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,2560,3584,0.010847999714314938
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,2560,4096,0.009727999567985535
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,2560,3584,0.008799999952316284
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,2560,3584,0.008832000195980072
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,2560,3072,0.010495999827980995
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,2560,3072,0.008767999708652496
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,2560,3072,0.008799999952316284
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,2560,2560,0.010015999898314476
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,2560,2560,0.00886400043964386
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,2560,2048,0.008448000065982342
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,2560,2560,0.008415999822318554
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,2560,6144,0.011359999887645245
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,2560,2048,0.008448000065982342
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,2560,2048,0.007584000006318092
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,2560,1536,0.008224000222980976
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,2560,1536,0.00825599953532219
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,2560,1536,0.007615999784320593
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,2560,1024,0.007968000136315823
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,2560,1024,0.008128000423312187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,2560,1024,0.006719999946653843
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,2560,768,0.007903999648988247
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,2560,768,0.008063999935984612
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,2560,512,0.00774399982765317
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,2560,768,0.006816000211983919
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,2560,512,0.008191999979317188
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,2560,256,0.008063999935984612
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,2560,512,0.006527999881654978
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,2560,256,0.007840000092983246
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,2560,256,0.006752000190317631
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,2560,128,0.0074880002066493034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,2560,128,0.007552000228315592
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,2560,128,0.006624000146985054
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,2560,64,0.007935999892652035
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,2560,64,0.00774399982765317
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,2560,32,0.007840000092983246
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,2560,64,0.006463999859988689
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,2560,32,0.007071999832987785
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,2560,32,0.006560000125318766
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,2048,12288,0.019840000197291374
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,2048,12288,0.01360000018030405
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,2048,16384,0.024447999894618988
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,2048,16384,0.016992000862956047
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,2048,16384,0.017055999487638474
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,2048,10240,0.01852799952030182
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,2048,12288,0.014240000396966934
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,2048,10240,0.014560000039637089
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,2048,8192,0.01500799972563982
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,2048,10240,0.01283199992030859
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,2048,8192,0.011392000131309032
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,2048,8192,0.01158399973064661
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,2048,7168,0.01583999954164028
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,2048,65536,0.06796800345182419
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,2048,7168,0.011168000288307667
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,2048,65536,0.04451199993491173
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,2048,7168,0.011071999557316303
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,2048,6144,0.012864000163972378
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,2048,6144,0.010912000201642513
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,2048,5120,0.012191999703645706
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,2048,6144,0.010048000141978264
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,2048,5120,0.010879999957978725
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,2048,5120,0.009375999681651592
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,2048,4096,0.00848000030964613
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,2048,4096,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,2048,4096,0.008799999952316284
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,2048,3584,0.010400000028312206
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,2048,65536,0.05135999992489815
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,2048,3584,0.008767999708652496
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,2048,3584,0.008415999822318554
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,2048,3072,0.010080000385642052
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,2048,3072,0.00848000030964613
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,2048,3072,0.008191999979317188
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,2048,2560,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,2048,2560,0.008352000266313553
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,2048,2560,0.007840000092983246
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,2048,2048,0.008287999778985977
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,2048,2048,0.008224000222980976
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,2048,1536,0.008128000423312187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,2048,2048,0.007584000006318092
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,2048,1024,0.007519999984651804
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,2048,1536,0.00854399986565113
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,2048,1536,0.007455999962985516
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,2048,1024,0.0081599997356534
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,2048,1024,0.00687999976798892
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,2048,768,0.008031999692320824
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,2048,768,0.008063999935984612
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,2048,768,0.006591999903321266
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,2048,512,0.00723200011998415
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,2048,512,0.008352000266313553
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,2048,512,0.006591999903321266
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,2048,256,0.007584000006318092
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,2048,256,0.007903999648988247
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,2048,256,0.0063680000603199005
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,2048,128,0.0074880002066493034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,2048,128,0.007712000049650669
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,2048,64,0.007584000006318092
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,2048,128,0.0066559999249875546
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,2048,64,0.007584000006318092
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,2048,64,0.006463999859988689
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,2048,32,0.0074880002066493034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,2048,32,0.006527999881654978
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,1536,12288,0.01929599978029728
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,1536,16384,0.023840000852942467
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,1536,16384,0.0161920003592968
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,1536,16384,0.0144640002399683
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,1536,12288,0.013439999893307686
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,2048,32,0.007296000141650438
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,1536,12288,0.012000000104308128
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,1536,10240,0.017343999817967415
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,1536,10240,0.01321600005030632
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,1536,8192,0.014495999552309513
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,1536,10240,0.011392000131309032
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,1536,8192,0.011008000001311302
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,1536,65536,0.04560000076889992
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,1536,65536,0.05772799998521805
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,1536,8192,0.010080000385642052
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,1536,7168,0.014495999552309513
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,1536,7168,0.010847999714314938
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,1536,6144,0.01244799979031086
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,1536,6144,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,1536,5120,0.012703999876976013
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,1536,6144,0.009184000082314014
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,1536,7168,0.009568000212311745
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,1536,5120,0.010623999871313572
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,1536,5120,0.00863999966531992
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,1536,65536,0.0414079986512661
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,1536,4096,0.008448000065982342
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,1536,4096,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,1536,4096,0.009088000282645226
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,1536,3584,0.009983999654650688
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,1536,3584,0.008832000195980072
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,1536,3584,0.007840000092983246
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,1536,3072,0.01065600011497736
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,1536,3072,0.008352000266313553
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,1536,3072,0.007455999962985516
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,1536,2560,0.00979200005531311
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,1536,2560,0.009184000082314014
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,1536,2560,0.007071999832987785
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,1536,2048,0.007968000136315823
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,1536,2048,0.008063999935984612
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,1536,2048,0.007519999984651804
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,1536,1536,0.0081599997356534
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,1536,1536,0.008128000423312187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,1536,1024,0.007840000092983246
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,1536,1536,0.0066559999249875546
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,1536,1024,0.007840000092983246
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,1536,768,0.007552000228315592
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,1536,1024,0.006527999881654978
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,1536,512,0.007903999648988247
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,1536,768,0.008224000222980976
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,1536,768,0.0063680000603199005
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,1536,512,0.008191999979317188
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,1536,256,0.007519999984651804
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,1536,512,0.006240000016987324
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,1536,256,0.007807999849319458
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,1536,128,0.007615999784320593
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,1536,256,0.006463999859988689
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,1536,128,0.007935999892652035
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,1536,64,0.007872000336647034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,1536,128,0.006527999881654978
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,1536,64,0.007615999784320593
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,1536,64,0.0063680000603199005
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,1536,32,0.007552000228315592
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,1536,32,0.00723200011998415
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,1536,32,0.006591999903321266
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,1024,12288,0.017823999747633934
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,1024,12288,0.013376000337302685
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,1024,16384,0.02160000056028366
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,1024,16384,0.015647999942302704
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,1024,16384,0.011935999616980553
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,1024,12288,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,1024,10240,0.0161920003592968
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,1024,10240,0.012640000320971012
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,1024,10240,0.009727999567985535
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,1024,8192,0.014271999709308147
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,1024,8192,0.010591999627649784
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,1024,7168,0.013952000066637993
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,1024,65536,0.04076800122857094
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,1024,65536,0.0504320003092289
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,1024,8192,0.009279999881982803
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,1024,7168,0.010623999871313572
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,1024,6144,0.01206399966031313
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,1024,7168,0.008671999908983707
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,1024,6144,0.010304000228643417
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,1024,5120,0.011872000060975552
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,1024,5120,0.010591999627649784
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,1024,4096,0.010048000141978264
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,1024,6144,0.008415999822318554
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,1024,4096,0.008576000109314919
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,1024,5120,0.007968000136315823
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,1024,4096,0.007296000141650438
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,1024,65536,0.031231999397277832
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,1024,3584,0.009855999611318111
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,1024,3584,0.008287999778985977
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,1024,3072,0.009600000455975533
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,1024,3584,0.007040000054985285
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,1024,3072,0.008031999692320824
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,1024,2560,0.010143999941647053
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,1024,3072,0.007071999832987785
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,1024,2560,0.007007999811321497
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,1024,2048,0.007903999648988247
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,1024,2048,0.0081599997356534
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,1024,2048,0.006943999789655209
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,1024,1536,0.007648000027984381
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,1024,1536,0.0080960001796484
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,1024,1024,0.0077760000713169575
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,1024,1024,0.007424000184983015
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,1024,1536,0.006271999794989824
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,1024,768,0.007840000092983246
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,1024,1024,0.006304000038653612
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,1024,768,0.007935999892652035
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,1024,512,0.00800000037997961
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,1024,512,0.00800000037997961
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,1024,768,0.006207999773323536
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,1024,512,0.00598399993032217
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,1024,256,0.007615999784320593
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,1024,256,0.007679999805986881
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,1024,256,0.006144000217318535
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,1024,128,0.007648000027984381
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,1024,128,0.007903999648988247
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,1024,2560,0.008191999979317188
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,1024,64,0.007584000006318092
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,1024,128,0.006335999816656113
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,1024,64,0.00825599953532219
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,1024,32,0.007519999984651804
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,1024,64,0.0060800001956522465
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,1024,32,0.0071680000983178616
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,1024,32,0.0060800001956522465
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,768,12288,0.017920000478625298
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,768,12288,0.013120000250637531
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,768,16384,0.021727999672293663
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,768,16384,0.015519999898970127
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,768,16384,0.010879999957978725
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,768,12288,0.00979200005531311
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,768,10240,0.015807999297976494
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,768,10240,0.012959999963641167
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,768,10240,0.008991999551653862
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,768,8192,0.013728000223636627
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,768,8192,0.010400000028312206
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,768,8192,0.00863999966531992
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,768,7168,0.014175999909639359
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,768,65536,0.03961599990725517
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,768,65536,0.04982399940490723
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,768,7168,0.010400000028312206
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,768,6144,0.011711999773979187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,768,7168,0.008511999621987343
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,768,6144,0.010304000228643417
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,768,5120,0.01228800043463707
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,768,5120,0.01017600018531084
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,768,6144,0.008128000423312187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,768,5120,0.0077760000713169575
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,768,4096,0.009920000098645687
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,768,4096,0.008224000222980976
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,768,65536,0.027168000116944313
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,768,4096,0.007135999854654074
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,768,3584,0.009824000298976898
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,768,3584,0.008128000423312187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,768,3584,0.007135999854654074
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,768,3072,0.008063999935984612
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,768,3072,0.009664000011980534
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,768,3072,0.00687999976798892
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,768,2560,0.008320000022649765
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,768,2560,0.009855999611318111
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,768,2048,0.007968000136315823
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,768,2560,0.00687999976798892
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,768,2048,0.008287999778985977
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,768,1536,0.007679999805986881
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,768,2048,0.006240000016987324
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,768,1536,0.0080960001796484
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,768,1024,0.007840000092983246
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,768,1536,0.006432000081986189
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,768,1024,0.008031999692320824
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,768,1024,0.0060800001956522465
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,768,768,0.007552000228315592
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,768,768,0.007968000136315823
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,768,512,0.007648000027984381
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,768,768,0.006175999995321035
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,768,512,0.008063999935984612
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,768,256,0.0077760000713169575
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,768,512,0.00598399993032217
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,768,256,0.007807999849319458
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,768,256,0.006207999773323536
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,768,128,0.007712000049650669
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,768,128,0.007424000184983015
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,768,128,0.006111999973654747
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,768,64,0.007648000027984381
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,768,64,0.007552000228315592
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,768,64,0.006111999973654747
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,768,32,0.007296000141650438
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,768,32,0.007199999876320362
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,768,32,0.006111999973654747
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,512,12288,0.017632000148296356
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,512,16384,0.02131200022995472
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,512,16384,0.014816000126302242
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,512,16384,0.010400000028312206
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,512,12288,0.008767999708652496
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,512,10240,0.017664000391960144
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,512,10240,0.012256000190973282
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,512,8192,0.013728000223636627
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,512,10240,0.00863999966531992
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,512,8192,0.010623999871313572
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,512,12288,0.013344000093638897
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,512,65536,0.039903998374938965
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,512,65536,0.0708480030298233
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,512,8192,0.008128000423312187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,512,7168,0.01375999953597784
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,512,7168,0.010591999627649784
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,512,7168,0.008128000423312187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,512,6144,0.012256000190973282
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,512,6144,0.01065600011497736
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,512,5120,0.011744000017642975
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,512,6144,0.007391999941319227
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,512,5120,0.010528000071644783
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,512,4096,0.009696000255644321
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,512,65536,0.022975999861955643
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,512,5120,0.007455999962985516
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,512,3584,0.009759999811649323
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,512,4096,0.008128000423312187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,512,4096,0.006976000033318996
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,512,3584,0.007807999849319458
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,512,3584,0.0074880002066493034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,512,3072,0.009472000412642956
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,512,3072,0.00825599953532219
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,512,3072,0.006752000190317631
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,512,2560,0.0080960001796484
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,512,2560,0.009279999881982803
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,512,2560,0.0066559999249875546
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,512,2048,0.007648000027984381
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,512,2048,0.007840000092983246
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,512,1536,0.007424000184983015
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,512,2048,0.006399999838322401
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,512,1536,0.00848000030964613
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,512,1024,0.007327999919652939
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,512,1536,0.006207999773323536
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,512,1024,0.00774399982765317
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,512,1024,0.005919999908655882
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,512,768,0.00774399982765317
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,512,768,0.00825599953532219
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,512,768,0.0058559998869895935
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,512,512,0.007552000228315592
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,512,512,0.007807999849319458
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,512,512,0.006207999773323536
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,512,256,0.007327999919652939
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,512,256,0.007552000228315592
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,512,256,0.006144000217318535
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,512,128,0.007679999805986881
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,512,128,0.007552000228315592
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,512,64,0.007071999832987785
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,512,128,0.006111999973654747
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,512,64,0.007935999892652035
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,512,64,0.00595200015231967
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,512,32,0.007135999854654074
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,512,32,0.007391999941319227
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,512,32,0.006144000217318535
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,256,12288,0.01244799979031086
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,256,12288,0.01744000054895878
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,256,16384,0.02127999998629093
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,256,16384,0.015647999942302704
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,256,16384,0.009535999968647957
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,256,12288,0.008767999708652496
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,256,10240,0.015904000028967857
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,256,10240,0.012671999633312225
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,256,10240,0.008287999778985977
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,256,8192,0.013407999649643898
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,256,8192,0.010304000228643417
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,256,8192,0.0074880002066493034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,256,65536,0.03903999924659729
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,256,65536,0.06947200000286102
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,256,7168,0.014175999909639359
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,256,7168,0.01033599954098463
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,256,6144,0.01142400037497282
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,256,7168,0.007424000184983015
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,256,6144,0.009983999654650688
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,256,5120,0.011615999974310398
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,256,5120,0.010208000428974628
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,256,65536,0.021376000717282295
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,256,4096,0.009664000011980534
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,256,6144,0.007071999832987785
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,256,5120,0.007071999832987785
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,256,4096,0.008031999692320824
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,256,4096,0.006752000190317631
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,256,3584,0.0080960001796484
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,256,3584,0.009664000011980534
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,256,3072,0.009631999768316746
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,256,3584,0.006976000033318996
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,256,3072,0.006591999903321266
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,256,3072,0.007679999805986881
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,256,2560,0.009472000412642956
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,256,2560,0.00774399982765317
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,256,2048,0.007455999962985516
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,256,2048,0.0077760000713169575
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,256,2560,0.006752000190317631
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,256,1536,0.007968000136315823
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,256,1536,0.007391999941319227
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,256,2048,0.006271999794989824
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,256,1536,0.006240000016987324
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,256,1024,0.007903999648988247
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,256,1024,0.0074880002066493034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,256,1024,0.006207999773323536
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,256,768,0.007872000336647034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,256,512,0.007807999849319458
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,256,768,0.0058559998869895935
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,256,512,0.007552000228315592
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,256,512,0.006175999995321035
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,256,256,0.00723200011998415
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,256,256,0.007872000336647034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,256,128,0.007519999984651804
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,256,256,0.006144000217318535
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,256,128,0.00800000037997961
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,256,128,0.005919999908655882
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,256,64,0.007455999962985516
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,256,64,0.006304000038653612
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,256,64,0.007679999805986881
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,256,32,0.007040000054985285
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,256,32,0.0071680000983178616
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,256,32,0.00595200015231967
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,256,768,0.007327999919652939
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,128,12288,0.017343999817967415
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,128,16384,0.015039999969303608
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,128,16384,0.020864000543951988
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,128,12288,0.01228800043463707
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,128,16384,0.009312000125646591
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,128,12288,0.008224000222980976
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,128,10240,0.01587199978530407
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,128,10240,0.012256000190973282
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,128,10240,0.008031999692320824
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,128,8192,0.014303999952971935
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,128,8192,0.010400000028312206
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,128,65536,0.0681919977068901
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,128,65536,0.0387520007789135
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,128,7168,0.013472000136971474
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,128,8192,0.0074880002066493034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,128,7168,0.010111999697983265
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,128,7168,0.007584000006318092
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,128,6144,0.011136000044643879
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,128,6144,0.009759999811649323
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,128,5120,0.009952000342309475
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,128,5120,0.011359999887645245
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,128,6144,0.0072639998979866505
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,128,4096,0.00979200005531311
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,128,65536,0.020479999482631683
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,128,5120,0.00684799998998642
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,128,4096,0.0077760000713169575
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,128,3584,0.009344000369310379
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,128,4096,0.006591999903321266
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,128,3584,0.0077760000713169575
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,128,3584,0.0066559999249875546
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,128,3072,0.008031999692320824
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,128,3072,0.009472000412642956
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,128,3072,0.006783999968320131
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,128,2560,0.009503999724984169
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,128,2560,0.0063680000603199005
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,128,2048,0.007648000027984381
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,128,2560,0.0077760000713169575
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,128,2048,0.007648000027984381
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,128,2048,0.006335999816656113
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,128,1536,0.007455999962985516
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,128,1536,0.00774399982765317
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,128,1536,0.006016000173985958
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,128,1024,0.007903999648988247
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,128,1024,0.0071680000983178616
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,128,512,0.007455999962985516
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,128,768,0.007679999805986881
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,128,768,0.0080960001796484
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,128,1024,0.005824000108987093
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,128,768,0.006175999995321035
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,128,512,0.007455999962985516
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,128,256,0.007296000141650438
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,128,512,0.005824000108987093
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,128,256,0.007552000228315592
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,128,128,0.007615999784320593
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,128,256,0.0060800001956522465
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,128,128,0.0074880002066493034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,128,128,0.006591999903321266
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,128,64,0.007040000054985285
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,128,64,0.007424000184983015
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,128,32,0.007424000184983015
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,128,64,0.005888000130653381
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,128,32,0.007071999832987785
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,128,32,0.006207999773323536
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,64,12288,0.018015999346971512
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,64,12288,0.012191999703645706
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,64,16384,0.02127999998629093
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,64,16384,0.014560000039637089
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,64,16384,0.009472000412642956
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,64,12288,0.00848000030964613
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,64,10240,0.015456000342965126
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,64,10240,0.011872000060975552
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,64,10240,0.007872000336647034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,64,8192,0.013824000023305416
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,64,8192,0.01065600011497736
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,64,65536,0.06851200014352798
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,64,65536,0.039135999977588654
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,64,8192,0.007296000141650438
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,64,7168,0.013824000023305416
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,64,7168,0.007455999962985516
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,64,6144,0.011359999887645245
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,64,6144,0.010080000385642052
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,64,6144,0.007071999832987785
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,64,65536,0.019967999309301376
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,64,5120,0.011327999643981457
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,64,5120,0.010623999871313572
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,64,5120,0.007296000141650438
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,64,4096,0.009952000342309475
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,64,4096,0.007712000049650669
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,64,4096,0.006783999968320131
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,64,3584,0.009472000412642956
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,64,3584,0.007903999648988247
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,64,3584,0.0066559999249875546
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,64,3072,0.009503999724984169
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,64,3072,0.007552000228315592
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,64,3072,0.006688000168651342
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,64,2560,0.009535999968647957
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,64,2560,0.007712000049650669
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,64,2560,0.006271999794989824
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,64,2048,0.007584000006318092
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,64,7168,0.010239999741315842
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,64,2048,0.007712000049650669
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,64,2048,0.005888000130653381
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,64,1536,0.007584000006318092
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,64,1536,0.007903999648988247
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,64,1536,0.006047999951988459
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,64,1024,0.00723200011998415
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,64,1024,0.007584000006318092
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,64,1024,0.005632000043988228
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,64,768,0.007615999784320593
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,64,768,0.0077760000713169575
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,64,768,0.006271999794989824
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,64,512,0.007584000006318092
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,64,512,0.007455999962985516
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,64,256,0.007712000049650669
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,64,256,0.007296000141650438
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,64,512,0.0058559998869895935
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,64,256,0.0058559998869895935
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,64,128,0.007135999854654074
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,64,128,0.007584000006318092
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,64,128,0.005919999908655882
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,64,64,0.0072639998979866505
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,64,64,0.007071999832987785
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,64,32,0.007135999854654074
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,64,64,0.00598399993032217
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,64,32,0.007071999832987785
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,64,32,0.005888000130653381
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,32,12288,0.017023999243974686
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,32,12288,0.012223999947309494
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,32,16384,0.021183999255299568
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,32,16384,0.014720000326633453
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,32,16384,0.008767999708652496
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,32,12288,0.009151999838650227
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,32,10240,0.015296000055968761
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,32,10240,0.01206399966031313
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,32,10240,0.007455999962985516
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,32,8192,0.013344000093638897
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,32,8192,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,32,8192,0.007424000184983015
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,32,65536,0.06876800209283829
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,32,7168,0.013183999806642532
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,32,65536,0.038975998759269714
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,32,7168,0.010080000385642052
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,32,7168,0.007296000141650438
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,32,6144,0.012703999876976013
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,32,5120,0.011136000044643879
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,32,6144,0.010111999697983265
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,32,5120,0.0098879998549819
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,32,6144,0.007071999832987785
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,32,4096,0.009696000255644321
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,32,5120,0.00687999976798892
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,32,65536,0.019936000928282738
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,32,4096,0.00774399982765317
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,32,3584,0.007807999849319458
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,32,3584,0.009472000412642956
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,32,4096,0.006207999773323536
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,32,3584,0.006752000190317631
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,32,3072,0.007712000049650669
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,32,3072,0.009312000125646591
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,32,2560,0.009440000168979168
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,32,3072,0.006240000016987324
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,32,2560,0.00800000037997961
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,32,2048,0.007327999919652939
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,32,2560,0.006399999838322401
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,32,2048,0.007840000092983246
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,32,1536,0.0072639998979866505
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,32,2048,0.006047999951988459
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,32,1536,0.007712000049650669
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,32,1536,0.005663999821990728
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,32,1024,0.007519999984651804
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,32,768,0.0074880002066493034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,32,1024,0.007519999984651804
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,32,1024,0.005919999908655882
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,32,768,0.00774399982765317
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,32,512,0.007584000006318092
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,32,768,0.00598399993032217
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,32,512,0.007519999984651804
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,32,256,0.007327999919652939
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,32,512,0.005888000130653381
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,32,256,0.007807999849319458
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,32,128,0.007455999962985516
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,32,256,0.006144000217318535
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,32,128,0.007455999962985516
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,32,128,0.00595200015231967
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,32,64,0.007040000054985285
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,32,64,0.006175999995321035
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,32,32,0.007391999941319227
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int4_wo,1,32,32,0.006783999968320131
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/smooth_quant_gemm_L96/PLUGIN_V2_SmoothQuantGemm_0,sq,1,32,32,0.00595200015231967
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,trt_flow_/weight_only_quant_matmul_L257/PLUGIN_V2_WeightOnlyQuantMatmul_0,int8_wo,1,32,64,0.0074880002066493034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,65536,16384,13.131778971354166
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,65536,16384,25.631955973307292
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,65536,12288,9.903676350911457
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,65536,16384,15.27066446940104
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,65536,12288,17.715118408203125
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,65536,10240,8.100647481282552
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,65536,8192,7.290538533528645
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,65536,8192,11.813931274414063
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,65536,7168,6.702128092447917
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,65536,10240,16.108949788411458
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,65536,10240,10.10524190266927
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,65536,7168,10.455224609375
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,65536,6144,5.072395833333333
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,65536,12288,12.273568725585937
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,65536,6144,9.01956787109375
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,65536,5120,4.398992919921875
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,65536,7168,6.911543273925782
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,65536,8192,7.702692159016927
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,65536,4096,3.3415489196777344
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,65536,5120,7.536289978027344
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,65536,4096,6.193265279134115
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,65536,3584,2.9581899007161456
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,65536,3584,5.409956359863282
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,65536,3072,2.675079345703125
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,65536,3072,4.579598999023437
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,65536,6144,5.877045186360677
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,65536,5120,4.942018127441406
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,65536,2560,2.2015904744466144
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,65536,2048,1.7572415669759114
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,65536,2560,4.418340047200521
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,65536,2048,3.132821400960286
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,65536,1536,1.4572799682617188
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,65536,4096,4.141542307535807
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,65536,1536,2.491955312093099
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,65536,1024,0.9576458613077798
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,65536,1024,1.6116746266682942
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,65536,768,0.8210805257161459
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,65536,768,1.208013916015625
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,65536,3584,3.7563051859537757
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,65536,512,0.9311274846394857
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,65536,512,0.605728022257487
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,65536,2560,2.727551015218099
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,65536,256,0.5557258605957032
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,65536,256,0.4606058756510417
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,65536,128,0.44193172454833984
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,65536,3072,3.3042762756347654
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,65536,128,0.4266165415445964
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,65536,64,0.4107701301574707
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,65536,2048,2.331652323404948
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,65536,64,0.41428693135579425
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,65536,32,0.3991498629252116
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,65536,32,0.4034869194030762
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,65536,1536,1.881005859375
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,65536,256,0.7583434422810872
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,65536,512,0.9146048227945963
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,65536,128,0.723251215616862
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,65536,768,1.1427285512288412
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,65536,1024,1.3236043294270834
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,16384,16384,3.7292757670084633
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,16384,12288,2.850352986653646
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,16384,12288,4.609852600097656
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,16384,16384,5.977158610026041
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,16384,10240,2.4051434834798178
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,16384,10240,3.811167907714844
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,16384,8192,1.8618762969970704
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,16384,8192,3.082677459716797
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,16384,65536,15.326492309570312
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,16384,7168,1.5633247375488282
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,16384,7168,3.3660001118977867
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,16384,16384,4.0681302388509115
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,16384,12288,3.074046834309896
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,16384,10240,2.650318908691406
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,16384,6144,1.4112927754720053
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,16384,6144,2.5002731323242187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,16384,5120,1.1776949564615884
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,16384,5120,1.855453872680664
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,16384,4096,0.9621407826741537
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,16384,4096,1.5536970774332681
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,16384,65536,24.76508992513021
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,16384,3584,1.4079424540201821
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,16384,3584,0.8603776295979818
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,16384,8192,2.1018389383951823
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,16384,3072,0.7515157063802083
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,16384,3072,1.2577482859293618
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,16384,7168,1.852448018391927
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,16384,2560,0.6300383885701497
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,16384,2560,1.0281002680460611
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,16384,5120,1.326849110921224
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,16384,65536,20.511299641927085
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,16384,2048,0.8414645512898764
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,16384,2048,0.493502934773763
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,16384,6144,1.5702901204427084
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,16384,1536,0.558786137898763
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,16384,1536,0.37864532470703127
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,16384,1024,0.40186131795247393
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,16384,1024,0.27537705103556315
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,16384,768,0.3068277359008789
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,16384,768,0.22350719769795738
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,16384,4096,0.9950080235799155
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,16384,512,0.22571306228637694
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,16384,3584,0.8953482945760092
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,16384,512,0.19366399447123211
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,16384,2560,0.6388725280761719
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,16384,256,0.14181013107299806
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,16384,256,0.13622612953186036
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,16384,128,0.11532800197601319
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,16384,128,0.12429653008778889
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,16384,3072,0.7626229604085286
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,16384,64,0.10738666852315266
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,16384,2048,0.5435904184977214
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,16384,64,0.12256426811218261
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,16384,32,0.10362880229949951
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,16384,32,0.1193610668182373
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,16384,1536,0.4460309346516927
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,16384,768,0.29979092280069985
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,16384,1024,0.33387626012166344
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,16384,256,0.20438613891601562
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,16384,512,0.24410667419433593
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,16384,128,0.18743252754211426
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,12288,16384,2.794586690266927
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,12288,16384,4.5272364298502605
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,12288,12288,2.110251744588216
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,12288,12288,3.3453313191731775
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,12288,10240,1.8040149688720704
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,12288,10240,2.7815691630045576
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,12288,8192,1.4002464294433594
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,12288,8192,2.15435307820638
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,12288,65536,10.952173868815105
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,12288,7168,1.3504895528157552
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,12288,7168,1.9465503692626953
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,12288,6144,1.6638848622639972
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,12288,6144,1.048258145650228
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,12288,12288,2.3842783610026044
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,12288,16384,3.1217002868652344
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,12288,10240,1.9680885314941405
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,12288,5120,0.8858709335327148
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,12288,5120,1.46124267578125
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,12288,65536,19.2894775390625
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,12288,4096,0.6967967987060547
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,12288,4096,1.1462741851806642
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,12288,3584,0.6287829081217449
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,12288,3584,1.044114112854004
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,12288,3072,0.8708341598510743
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,12288,3072,0.5541013081868489
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,12288,8192,1.5324724833170573
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,12288,7168,1.25589968363444
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,12288,2560,0.7526528040568035
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,12288,2560,0.4900191942850749
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,12288,65536,14.383910115559896
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,12288,2048,0.5646858851114909
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,12288,2048,0.47623891830444337
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,12288,1536,0.5152266820271809
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,12288,1536,0.332207997639974
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,12288,5120,0.91202023824056
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,12288,1024,0.3244426727294922
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,12288,1024,0.2256373405456543
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,12288,6144,1.1721300760904947
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,12288,768,0.2637653350830078
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,12288,768,0.18429333368937176
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,12288,4096,0.74694398244222
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,12288,512,0.18641494115193685
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,12288,512,0.1474847952524821
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,12288,3584,0.6568864186604817
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,12288,256,0.10308266480763753
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,12288,256,0.10957547028859456
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,12288,2560,0.4960938771565755
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,12288,128,0.08784746328989665
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,12288,128,0.10017173290252686
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,12288,3072,0.5811146418253581
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,12288,64,0.07997439702351888
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,12288,64,0.09589866797129312
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,12288,32,0.07874879837036133
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,12288,32,0.09322346846262614
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,12288,2048,0.41384747823079426
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,12288,1536,0.334282652537028
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,12288,256,0.15474559466044108
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,12288,512,0.18988693555196126
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,12288,768,0.2262773354848226
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,12288,128,0.14454612731933594
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,12288,1024,0.25662399927775065
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,10240,16384,2.5091509501139324
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,10240,12288,1.8288682301839192
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,10240,16384,3.8760022481282554
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,10240,12288,2.9623456319173176
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,10240,10240,1.5503082275390625
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,10240,10240,2.2640970865885417
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,10240,65536,9.425018310546875
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,10240,8192,1.2244842529296875
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,10240,8192,1.9679412841796875
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,10240,7168,1.7264383951822917
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,10240,7168,1.0606026967366537
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,10240,6144,1.4586368560791017
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,10240,6144,0.9287466684977213
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,10240,65536,16.33333536783854
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,10240,16384,2.4382985432942705
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,10240,12288,1.935368474324544
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,10240,10240,1.59847780863444
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,10240,5120,1.171957270304362
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,10240,5120,0.7965610504150391
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,10240,4096,1.0154186884562173
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,10240,4096,0.6454858779907227
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,10240,3584,0.8082197189331055
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,10240,3584,0.551958401997884
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,10240,7168,1.1060128529866537
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,10240,8192,1.245366414388021
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,10240,65536,11.155600992838542
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,10240,3072,0.7107178370157878
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,10240,3072,0.5329824129740397
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,10240,2560,0.5720223744710287
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,10240,2560,0.4361077308654785
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,10240,2048,0.3416597366333008
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,10240,2048,0.4864981333414714
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,10240,1536,0.4103701273600261
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,10240,1536,0.3009119987487793
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,10240,5120,0.7803413391113281
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,10240,1024,0.2531936009724935
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,10240,6144,0.9278133392333985
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,10240,1024,0.19797439575195314
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,10240,768,0.19079467455546062
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,10240,768,0.15841600100199382
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,10240,4096,0.6222047805786133
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,10240,512,0.1402623971303304
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,10240,512,0.12498453458150227
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,10240,3584,0.5516053517659505
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,10240,256,0.10016000270843506
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,10240,256,0.09484480222066244
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,10240,2560,0.4138954798380534
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,10240,128,0.07522559960683187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,10240,128,0.08455999692281088
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,10240,2048,0.3473642667134603
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,10240,64,0.06917226314544678
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,10240,64,0.08212160269419352
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,10240,3072,0.4866197268168132
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,10240,32,0.0684063990910848
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,10240,32,0.08132479985555013
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,10240,1536,0.28147414525349934
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,10240,768,0.19228906631469728
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,10240,512,0.15852799415588378
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,10240,1024,0.21649813652038574
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,10240,128,0.11980053583780925
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,10240,256,0.12973439693450928
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,8192,16384,2.020679473876953
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,8192,16384,3.183058166503906
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,8192,12288,1.5055861155192056
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,8192,12288,2.3757728576660155
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,8192,10240,1.2783167521158854
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,8192,10240,1.904751968383789
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,8192,8192,1.574946085611979
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,8192,8192,1.0287839889526367
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,8192,65536,8.297356669108073
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,8192,7168,1.4233685811360677
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,8192,7168,0.8829408009847006
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,8192,6144,1.2026475270589194
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,8192,6144,0.7962197621663412
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,8192,65536,12.10853983561198
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,8192,5120,1.0344725290934245
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,8192,10240,1.324118423461914
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,8192,5120,0.8075957616170248
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,8192,12288,1.5760575612386067
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,8192,16384,2.181806945800781
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,8192,4096,0.5137781461079916
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,8192,4096,0.9493386586507162
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,8192,3584,0.6485013326009115
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,8192,3584,0.4721045176188151
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,8192,3072,0.5775018692016601
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,8192,3072,0.4113045374552409
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,8192,7168,0.9176885604858398
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,8192,65536,9.11219482421875
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,8192,8192,1.0617631912231444
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,8192,2560,0.49150934219360354
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,8192,2560,0.3204202651977539
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,8192,2048,0.4006208101908366
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,8192,2048,0.26111146608988445
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,8192,1536,0.2826570510864258
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,8192,1536,0.22289813359578453
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,8192,5120,0.6349706649780273
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,8192,6144,0.796554692586263
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,8192,1024,0.2121568044026693
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,8192,1024,0.1814773400624593
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,8192,768,0.1563488006591797
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,8192,768,0.1372181256612142
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,8192,4096,0.5117717425028483
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,8192,512,0.10964586734771728
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,8192,512,0.09827626546223958
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,8192,3584,0.45442879994710283
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,8192,256,0.0678656021753947
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,8192,256,0.07844693660736084
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,8192,2560,0.3392640113830566
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,8192,128,0.05981226762135824
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,8192,128,0.07105279763539632
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,8192,3072,0.3994271914164225
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,8192,64,0.055080533027648926
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,8192,64,0.07074453035990397
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,8192,32,0.054413866996765134
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,8192,32,0.06954452991485596
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,8192,2048,0.29139839808146156
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,8192,1536,0.23497600555419923
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,8192,768,0.1587615966796875
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,8192,512,0.13166186809539795
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,8192,256,0.10732479890187581
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,8192,128,0.09701226552327474
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,8192,1024,0.17897920608520507
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,7168,16384,1.7747263590494793
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,7168,16384,2.7717056274414062
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,7168,12288,1.898818079630534
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,7168,12288,1.326483154296875
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,7168,65536,7.2248682657877605
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,7168,10240,1.6140650431315104
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,7168,10240,1.1261216481526692
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,7168,8192,1.3332586924235026
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,7168,8192,0.9202079772949219
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,7168,7168,0.8107327779134115
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,7168,65536,10.37615254720052
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,7168,7168,1.1775839487711588
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,7168,6144,0.689084815979004
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,7168,6144,0.9625727971394857
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,7168,5120,0.8519210815429688
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,7168,5120,0.6759370803833008
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,7168,16384,1.9159712473551433
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,7168,12288,1.367741902669271
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,7168,10240,1.1352043151855469
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,7168,4096,0.6481109619140625
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,7168,4096,0.4748160044352214
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,7168,3584,0.41565866470336915
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,7168,3584,0.5761322657267253
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,7168,3072,0.4928928057352702
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,7168,65536,7.226161193847656
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,7168,3072,0.3737408002217611
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,7168,2560,0.3945162773132324
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,7168,2560,0.31145814259847004
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,7168,7168,0.7973983764648438
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,7168,8192,0.9426730473836263
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,7168,2048,0.3791808128356934
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,7168,2048,0.29091199239095056
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,7168,1536,0.2814367930094401
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,7168,1536,0.199454927444458
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,7168,5120,0.564313570658366
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,7168,1024,0.17565333048502604
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,7168,6144,0.674456532796224
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,7168,1024,0.15320639610290526
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,7168,768,0.1327722628911336
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,7168,768,0.11951786677042645
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,7168,4096,0.45757439931233723
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,7168,512,0.10384426911671955
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,7168,512,0.09872426986694335
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,7168,2560,0.3072736104329427
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,7168,256,0.061402666568756106
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,7168,3584,0.4219061215718587
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,7168,256,0.07225493590037027
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,7168,128,0.05372053384780884
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,7168,128,0.06401919921239217
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,7168,3072,0.360479990641276
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,7168,64,0.04923306703567505
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,7168,2048,0.26205652554829917
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,7168,64,0.06354986826578776
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,7168,32,0.048110934098561604
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,7168,32,0.06328746477762857
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,7168,1536,0.21453439394632973
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,7168,768,0.14453760782877606
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,7168,128,0.08974186579386392
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,7168,256,0.09796906312306722
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,7168,512,0.11859839757283527
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,6144,16384,2.3325770060221354
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,7168,1024,0.16662079493204754
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,6144,16384,1.6082901000976562
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,6144,12288,1.771306610107422
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,6144,12288,1.2332800547281901
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,6144,10240,1.4606250762939452
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,6144,10240,1.0256415685017903
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,6144,65536,6.406990051269531
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,6144,8192,0.8400010426839193
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,6144,8192,1.1713141123453776
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,6144,7168,0.7376725514729817
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,6144,7168,1.0354965209960938
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,6144,65536,9.0848388671875
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,6144,6144,0.8501120249430338
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,6144,6144,0.6224522908528646
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,6144,5120,0.695576540629069
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,6144,5120,0.5438197453816731
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,6144,12288,1.1916810353597005
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,6144,10240,0.97237548828125
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,6144,4096,0.583454958597819
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,6144,4096,0.4517322540283203
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,6144,16384,1.6194868723551434
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,6144,3584,0.4765130678812663
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,6144,3584,0.4045610745747884
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,6144,3072,0.4194794654846191
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,6144,3072,0.33778241475423176
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,6144,65536,6.862353006998698
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,6144,2560,0.3842944145202637
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,6144,7168,0.6753952026367187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,6144,2560,0.2805514653523763
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,6144,8192,0.7789109547932942
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,6144,2048,0.2737728118896484
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,6144,2048,0.2387242635091146
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,6144,1536,0.21712106068929038
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,6144,1536,0.18885119756062824
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,6144,5120,0.49460159937540693
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,6144,1024,0.15181013743082683
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,6144,6144,0.5767616271972656
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,6144,1024,0.1334869384765625
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,6144,4096,0.4045983950297038
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,6144,768,0.1129205306371053
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,6144,768,0.1070698658625285
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,6144,512,0.084661332766215
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,6144,512,0.08249173164367676
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,6144,3584,0.3558229446411133
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,6144,256,0.05408426523208618
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,6144,2560,0.26833171844482423
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,6144,256,0.06559360027313232
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,6144,128,0.047891199588775635
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,6144,128,0.05785173177719116
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,6144,3072,0.3149802525838216
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,6144,64,0.04364053408304851
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,6144,2048,0.22657492955525718
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,6144,64,0.05760000149408976
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,6144,32,0.04235413471857707
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,6144,32,0.05751466751098633
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,6144,1536,0.18378559748331708
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,6144,1024,0.14202133814493817
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,6144,128,0.07582080364227295
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,6144,256,0.08241066932678223
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,6144,512,0.10192853609720867
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,5120,16384,1.9613610585530599
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,5120,16384,1.3910420735677085
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,6144,768,0.12262933254241944
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,5120,12288,1.416058603922526
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,5120,65536,5.738690185546875
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,5120,12288,1.0807050069173179
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,5120,10240,1.162613296508789
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,5120,10240,0.944582430521647
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,5120,8192,0.9359807968139648
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,5120,8192,0.7653226852416992
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,5120,65536,7.46064961751302
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,5120,7168,0.8683968226114909
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,5120,7168,0.6328181584676107
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,5120,6144,0.7378431955973308
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,5120,6144,0.5490090688069661
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,5120,5120,0.5601376215616862
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,5120,5120,0.47002239227294923
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,5120,10240,0.8368426640828451
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,5120,12288,0.998851203918457
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,5120,4096,0.4666890780131022
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,5120,16384,1.3744191487630208
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,5120,4096,0.411135991414388
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,5120,3584,0.4329600016276042
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,5120,65536,5.568217468261719
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,5120,3584,0.33365545272827146
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,5120,3072,0.35623998641967775
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,5120,3072,0.2921567916870117
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,5120,2560,0.2815199851989746
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,5120,7168,0.5737418492635091
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,5120,2560,0.2469653288523356
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,5120,2048,0.22855253219604493
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,5120,2048,0.20379093488057456
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,5120,8192,0.669047482808431
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,5120,1536,0.18932266235351564
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,5120,5120,0.42084480921427414
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,5120,1536,0.17006079355875653
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,5120,1024,0.1326037327448527
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,5120,1024,0.12105706532796223
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,5120,6144,0.5040554682413737
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,5120,768,0.10715306599934896
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,5120,768,0.10333333015441895
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,5120,4096,0.3501397450764974
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,5120,512,0.07418560187021891
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,5120,2560,0.23101545969645182
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,5120,512,0.07768959999084472
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,5120,256,0.04952319860458374
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,5120,256,0.05883520046869913
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,5120,3584,0.3092543919881185
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,5120,128,0.03997439940770467
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,5120,128,0.051345066229502356
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,5120,2048,0.19673600196838378
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,5120,3072,0.27205012639363607
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,5120,64,0.03631573518117269
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,5120,32,0.03612373272577922
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,5120,64,0.05151786804199219
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,5120,32,0.05113919973373413
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,5120,1536,0.15986773173014324
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,5120,768,0.10837120215098064
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,5120,1024,0.12511999607086183
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,4096,16384,1.493011220296224
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,4096,16384,1.2117195129394531
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,5120,256,0.0720906654993693
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,5120,128,0.06644800106684366
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,4096,12288,1.0563274383544923
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,5120,512,0.08847786585489908
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,4096,12288,0.9186357498168946
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,4096,65536,4.853482564290365
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,4096,10240,1.010707219441732
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,4096,10240,0.7550954818725586
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,4096,65536,5.775128682454428
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,4096,8192,0.6806901295979817
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,4096,8192,0.6478282928466796
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,4096,7168,0.5821834564208984
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,4096,7168,0.6224341074625651
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,4096,6144,0.5388341267903646
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,4096,6144,0.48868799209594727
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,4096,5120,0.43068908055623367
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,4096,5120,0.39464213053385416
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,4096,16384,1.0741013844807943
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,4096,10240,0.7132874806722005
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,4096,4096,0.36659199396769204
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,4096,12288,0.8384159723917642
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,4096,4096,0.34328959782918295
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,4096,3584,0.3180341402689616
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,4096,65536,4.827128601074219
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,4096,3584,0.29349546432495116
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,4096,3072,0.25439680417378746
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,4096,3072,0.28486080169677735
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,4096,2560,0.224783992767334
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,4096,7168,0.4834752082824707
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,4096,2560,0.22331199645996094
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,4096,2048,0.17894506454467773
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,4096,2048,0.1801344076792399
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,4096,8192,0.5432191848754883
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,4096,1536,0.14011093775431316
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,4096,6144,0.40898453394571943
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,4096,1536,0.14143679936726888
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,4096,1024,0.09652372996012369
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,4096,1024,0.1014847993850708
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,4096,5120,0.352076784769694
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,4096,768,0.07737812995910645
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,4096,768,0.08315093517303467
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,4096,4096,0.2882378578186035
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,4096,2560,0.19370880126953124
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,4096,512,0.05744426647822062
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,4096,512,0.06616746584574382
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,4096,256,0.037529599666595456
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,4096,256,0.05108586549758911
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,4096,3584,0.25557866096496584
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,4096,128,0.032806400458017984
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,4096,128,0.04535893201828003
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,4096,2048,0.1622378667195638
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,4096,64,0.02956266601880391
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,4096,64,0.04518186648686727
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,4096,32,0.029919999837875366
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,4096,3072,0.22509867350260415
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,4096,32,0.045075198014577225
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,4096,1536,0.1311520020167033
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,4096,1024,0.10207040309906006
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,4096,256,0.05839360157648722
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,3584,16384,1.2678517659505208
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,4096,768,0.08741546471913655
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,4096,128,0.05339733362197876
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,3584,65536,4.464180501302083
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,4096,512,0.07220906416575114
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,3584,12288,0.9269194920857748
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,3584,16384,1.1255274454752604
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,3584,12288,0.8373653411865234
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,3584,65536,5.307166035970052
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,3584,10240,0.7668725331624349
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,3584,8192,0.6155359903971355
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,3584,10240,0.7380842844645182
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,3584,8192,0.6004586537679036
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,3584,7168,0.5490624109903972
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,3584,7168,0.5213664054870606
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,3584,6144,0.5064490636189778
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,3584,6144,0.44173758824666337
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,3584,5120,0.37700586318969725
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,3584,5120,0.37460158665974935
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,3584,10240,0.6136682510375977
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,3584,4096,0.32825066248575846
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,3584,12288,0.7330549240112305
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,3584,65536,4.163547770182292
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,3584,4096,0.313753604888916
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,3584,16384,1.0020597457885743
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,3584,3584,0.27480961481730143
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,3584,3584,0.3269482612609863
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,3584,7168,0.4391743977864583
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,3584,3072,0.23393813769022623
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,3584,3072,0.2431872049967448
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,3584,2560,0.1974634647369385
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,3584,2560,0.20367786089579262
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,3584,2048,0.15786239306132
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,3584,5120,0.3180053393046061
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,3584,2048,0.16611305872599286
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,3584,1536,0.1214133342107137
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,3584,8192,0.49810345967610675
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,3584,1536,0.12822186946868896
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,3584,1024,0.08548800150553384
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,3584,1024,0.09477012952168783
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,3584,6144,0.37924585342407224
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,3584,768,0.06886826356252035
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,3584,768,0.07842559814453125
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,3584,2560,0.17728959719340007
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,3584,512,0.051278932889302575
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,3584,512,0.062009600798288975
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,3584,4096,0.2609834671020508
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,3584,256,0.03415573438008626
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,3584,256,0.04822506507237752
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,3584,3584,0.23470187187194824
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,3584,128,0.029363199075063066
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,3584,2048,0.14751359621683757
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,3584,128,0.04318933486938477
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,3584,64,0.02722559968630473
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,3584,64,0.041832534472147624
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,3584,32,0.026796799898147584
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,3584,3072,0.20773332913716636
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,3584,32,0.04203840096791585
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,3584,1536,0.12056106726328533
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,3584,768,0.08160746892293294
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,3584,1024,0.09336000283559164
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,3072,16384,1.0779861450195312
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,3072,16384,1.0475381215413413
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,3072,65536,4.1173759460449215
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,3072,65536,4.383867899576822
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,3584,256,0.05383786757787069
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,3584,128,0.04876586596171061
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,3584,512,0.06627839803695679
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,3072,12288,0.7804554621378581
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,3072,12288,0.7838026682535808
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,3072,10240,0.6335562388102214
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,3072,10240,0.6406581242879231
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,3072,8192,0.5686464309692383
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,3072,8192,0.5281930605570475
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,3072,7168,0.45743465423583984
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,3072,7168,0.4897493362426758
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,3072,6144,0.4163989384969075
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,3072,6144,0.40694611867268876
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,3072,5120,0.33119465510050455
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,3072,5120,0.35750080744425455
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,3072,10240,0.5421237309773763
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,3072,65536,3.7507807413736978
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,3072,4096,0.28059307734171546
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,3072,4096,0.2908639907836914
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,3072,12288,0.6454826354980469
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,3072,3584,0.23344853719075523
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,3072,3584,0.25264320373535154
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,3072,3072,0.20067092577616372
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,3072,16384,0.886619758605957
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,3072,3072,0.21541226704915367
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,3072,7168,0.38648532231648763
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,3072,2560,0.1684351921081543
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,3072,2560,0.18418347040812175
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,3072,2048,0.13754879633585612
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,3072,2048,0.15176000595092773
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,3072,5120,0.28352425893147787
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,3072,1536,0.10798079967498779
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,3072,1536,0.11788372993469239
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,3072,8192,0.43614934285481766
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,3072,6144,0.3343061447143555
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,3072,1024,0.07521066665649415
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,3072,1024,0.08728960355122885
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,3072,768,0.06005119880040487
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,3072,768,0.07249279816945395
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,3072,2560,0.15520000457763672
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,3072,4096,0.23322240511576334
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,3072,512,0.0451200008392334
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,3072,512,0.05727359851201376
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,3072,256,0.03044160008430481
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,3072,256,0.04330133199691773
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,3072,3584,0.2083946704864502
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,3072,128,0.025814400116602583
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,3072,128,0.03853973150253296
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,3072,2048,0.13181653022766113
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,3072,64,0.02376000086466471
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,3072,64,0.039622398217519124
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,3072,32,0.024462932348251344
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,3072,3072,0.18181653022766114
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,3072,32,0.03901333411534627
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,3072,1536,0.10848960081736247
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,3072,768,0.0715669314066569
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,2560,16384,0.8885237375895182
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,3072,1024,0.08303786913553873
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,2560,16384,1.0049653371175131
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,2560,12288,0.6344928105672201
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,2560,65536,3.7762026468912757
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,2560,65536,3.722600555419922
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,3072,256,0.046749866008758544
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,2560,12288,0.7222496032714844
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,2560,10240,0.5423871994018554
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,3072,128,0.04254293441772461
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,2560,8192,0.45773760477701825
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,2560,10240,0.5919839859008789
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,2560,8192,0.4755658785502116
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,3072,512,0.05773866573969523
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,2560,7168,0.3775007883707682
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,2560,7168,0.4197823842366536
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,2560,6144,0.3260565439860026
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,2560,6144,0.3633653322855631
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,2560,5120,0.27167787551879885
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,2560,5120,0.3283754666646322
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,2560,10240,0.4746815999348958
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,2560,4096,0.2188810666402181
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,2560,65536,3.411064656575521
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,2560,12288,0.5653162638346354
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,2560,16384,0.7704896291097005
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,2560,4096,0.2534848054250082
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,2560,3584,0.1926634629567464
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,2560,3072,0.16695040067036945
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,2560,3584,0.23398079872131347
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,2560,3072,0.19994880358378092
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,2560,2560,0.14117333094278972
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,2560,7168,0.3352917353312174
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,2560,2560,0.17083093325297039
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,2560,2048,0.1149941364924113
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,2560,2048,0.14075627326965331
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,2560,5120,0.24848532676696777
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,2560,8192,0.3870794614156087
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,2560,1536,0.08952639897664388
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,2560,6144,0.29502506256103517
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,2560,1536,0.11032213370005291
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,2560,1024,0.06369280020395915
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,2560,1024,0.08122453689575196
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,2560,768,0.05100586811701456
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,2560,768,0.06665493249893188
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,2560,2560,0.13639893531799316
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,2560,512,0.03917333285013835
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,2560,512,0.05370986859003703
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,2560,4096,0.20643413861592613
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,2560,256,0.028050132592519122
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,2560,256,0.040540798505147295
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,2560,3584,0.1816757361094157
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,2560,128,0.021963733434677123
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,2560,128,0.035259731610616046
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,2560,2048,0.11588373184204101
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,2560,64,0.019576533635457357
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,2560,64,0.03463360071182251
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,2560,32,0.02001706759134928
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,2560,3072,0.1598624070485433
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,2560,32,0.034178133805592856
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,2560,1536,0.09370773633321126
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,2560,1024,0.07294080257415772
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,2048,65536,3.097142283121745
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,2048,16384,0.6743861516316731
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,2048,65536,3.332281494140625
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,2048,16384,0.846671994527181
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,2048,12288,0.5034591992696126
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,2560,768,0.061950933933258054
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,2560,256,0.04079253276189168
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,2048,12288,0.63460693359375
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,2048,10240,0.4226613362630208
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,2560,128,0.03643093506495158
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,2048,10240,0.5402890523274739
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,2560,512,0.05034346580505371
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,2048,8192,0.3405920028686523
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,2048,8192,0.43256107966105145
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,2048,7168,0.29571520487467445
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,2048,7168,0.40787200927734374
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,2048,6144,0.25534292856852214
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,2048,6144,0.33715521494547523
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,2048,5120,0.21628160476684571
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,2048,5120,0.2895893414815267
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,2048,12288,0.4817301432291667
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,2048,4096,0.17384427388509113
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,2048,10240,0.4086912155151367
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,2048,65536,2.827382405598958
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,2048,4096,0.23090666135152182
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,2048,3584,0.17729706764221193
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,2048,3584,0.20478612581888833
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,2048,7168,0.286955738067627
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,2048,3072,0.13290026982625325
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,2048,3072,0.17756266593933107
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,2048,5120,0.21273706754048666
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,2048,2560,0.11235093275705973
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,2048,16384,0.6512416203816731
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,2048,2560,0.15332479476928712
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,2048,2048,0.09843626817067465
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,2048,2048,0.1259722630182902
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,2048,1536,0.0710752010345459
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,2048,1536,0.0986303965250651
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,2048,8192,0.3345226605733236
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,2048,6144,0.2495317300160726
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,2048,1024,0.05201493501663208
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,2048,1024,0.0728437344233195
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,2048,768,0.04124266703923543
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,2048,768,0.06123626629511515
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,2048,2560,0.11655786832173665
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,2048,512,0.032001066207885745
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,2048,512,0.048733866214752196
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,2048,4096,0.17395200729370117
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,2048,256,0.022098133961359658
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,2048,256,0.03674986759821574
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,2048,2048,0.09740906556447347
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,2048,128,0.01700053413709005
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,2048,3584,0.1562869389851888
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,2048,128,0.03125440080960591
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,2048,64,0.014774399995803832
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,2048,64,0.030473599831263225
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,2048,32,0.015685333808263143
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,2048,32,0.02990399996439616
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,2048,1536,0.07906453609466553
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,2048,3072,0.1355562686920166
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,2048,768,0.05263146559397379
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,1536,16384,0.4981045405069987
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,1536,16384,0.7572693506876628
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,2048,1024,0.06158506472905477
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,1536,65536,2.3353663126627606
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,1536,12288,0.38656320571899416
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,1536,12288,0.5870527903238932
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,1536,65536,2.9737012227376303
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,1536,10240,0.3196960131327311
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,1536,10240,0.48076801300048827
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,1536,8192,0.2565791924794515
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,2048,256,0.03376213312149048
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,1536,8192,0.38963305155436195
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,2048,128,0.030193066596984862
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,1536,7168,0.22589866320292154
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,2048,512,0.04233173529307048
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,1536,7168,0.34552745819091796
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,1536,6144,0.1953173319498698
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,1536,6144,0.2991893450419108
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,1536,5120,0.1630239963531494
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,1536,5120,0.25499092737833656
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,1536,10240,0.3423754692077637
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,1536,12288,0.41259307861328126
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,1536,4096,0.13259306748708088
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,1536,65536,2.2930442810058596
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,1536,4096,0.2085653305053711
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,1536,3584,0.12845226923624675
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,1536,3584,0.18573973973592123
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,1536,16384,0.5368565241495769
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,1536,3072,0.10317013263702393
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,1536,3072,0.16024853388468424
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,1536,2560,0.08506666819254557
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,1536,2560,0.13712639808654786
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,1536,7168,0.2440021355946859
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,1536,2048,0.06991573174794516
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,1536,2048,0.11433706283569336
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,1536,8192,0.2742645263671875
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,1536,1536,0.05432213147481283
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,1536,1536,0.0898751974105835
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,1536,6144,0.21321493784586587
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,1536,1024,0.040088534355163574
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,1536,1024,0.06605973243713378
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,1536,5120,0.1809567928314209
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,1536,768,0.03282879988352458
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,1536,768,0.05473386843999227
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,1536,2560,0.09921813011169434
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,1536,512,0.025124265750249224
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,1536,512,0.04390506744384766
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,1536,4096,0.14770240783691407
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,1536,256,0.01798506577809652
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,1536,256,0.03110506733258565
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,1536,3584,0.13240426381429035
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,1536,128,0.013886933525403341
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,1536,128,0.028750934203465778
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,1536,3072,0.11704213619232177
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,1536,64,0.012268799543380737
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,1536,64,0.02829013268152873
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,1536,32,0.012615467111269632
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,1536,2048,0.08190186818440756
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,1536,32,0.027638399600982667
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,1024,65536,1.424461873372396
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,1536,1536,0.06667839686075847
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,1024,16384,0.3323018709818522
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,1024,65536,2.6614112854003906
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,1024,16384,0.6710389455159504
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,1536,768,0.04380160172780355
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,1024,12288,0.2514069398244222
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,1024,12288,0.5053109486897787
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,1536,256,0.027412267525990804
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,1536,1024,0.05161066850026449
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,1024,10240,0.21240320205688476
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,1536,128,0.023962666591008507
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,1024,8192,0.17079893747965497
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,1024,10240,0.42626132965087893
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,1536,512,0.035334400335947674
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,1024,7168,0.15041173299153646
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,1024,8192,0.3416138648986816
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,1024,6144,0.12845653692881268
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,1024,7168,0.30203625361124675
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,1024,6144,0.2603797276814779
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,1024,5120,0.1082645336786906
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,1024,5120,0.2210378646850586
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,1024,10240,0.27606293360392253
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,1024,4096,0.08852907021840414
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,1024,4096,0.18202880223592122
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,1024,12288,0.32949654261271155
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,1024,3584,0.07693013350168863
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,1024,65536,1.7891061147054035
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,1024,3584,0.16232320467631023
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,1024,3072,0.0668170690536499
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,1024,3072,0.1411893367767334
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,1024,7168,0.19917227427164713
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,1024,2560,0.0573365330696106
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,1024,16384,0.44025599161783857
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,1024,5120,0.1449674606323242
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,1024,2560,0.12170560359954834
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,1024,2048,0.04677120049794515
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,1024,2048,0.10070079962412518
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,1024,1536,0.03648533423741658
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,1024,1536,0.07759040196736654
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,1024,8192,0.225492270787557
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,1024,6144,0.17135252952575683
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,1024,1024,0.026078933477401735
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,1024,1024,0.055111467838287354
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,1024,768,0.0212501327196757
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,1024,768,0.0440287987391154
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,1024,2560,0.07914453347524007
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,1024,512,0.016411733627319337
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,1024,512,0.033514666557312014
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,1024,4096,0.11983893712361653
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,1024,256,0.011386666695276897
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,1024,256,0.02576533357302348
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,1024,2048,0.06581013202667237
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,1024,3584,0.10662933190663655
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,1024,128,0.008784000078837078
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,1024,128,0.02179946700731913
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,1024,1536,0.053420801957448326
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,1024,64,0.007652266820271809
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,1024,64,0.02132800022761027
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,1024,3072,0.0921610673268636
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,1024,32,0.008039466540018718
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,1024,32,0.021002666155497233
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,1024,768,0.03482026656468709
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,768,65536,1.0449877421061198
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,768,16384,0.28735148111979164
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,1024,1024,0.040775465965271
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,768,16384,0.6462357203165691
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,768,12288,0.19931093851725262
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,768,65536,2.4623733520507813
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,768,12288,0.47859627405802413
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,768,10240,0.16668693224589032
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,768,10240,0.39994560877482094
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,768,8192,0.1342400074005127
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,1024,256,0.020489599307378134
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,1024,128,0.017875200510025023
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,768,8192,0.3231722513834635
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,768,7168,0.11644372940063477
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,1024,512,0.02677226662635803
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,768,7168,0.28532158533732094
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,768,6144,0.10299839973449706
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,768,6144,0.247979736328125
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,768,5120,0.08557439645131429
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,768,5120,0.2105151971181234
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,768,65536,1.5354208628336588
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,768,10240,0.24331092834472656
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,768,4096,0.06897599697113037
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,768,4096,0.17177920341491698
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,768,12288,0.291708787282308
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,768,3584,0.06041386524836222
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,768,16384,0.38264214197794594
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,768,3584,0.1520682652791341
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,768,3072,0.0529642661412557
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,768,3072,0.13292053540547688
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,768,2560,0.04443306525548299
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,768,2560,0.11402773062388102
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,768,7168,0.1751893361409505
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,768,5120,0.1286911964416504
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,768,2048,0.03659093379974365
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,768,2048,0.09451626936594645
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,768,1536,0.028553599119186403
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,768,1536,0.07208746274312337
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,768,6144,0.15363413492838543
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,768,8192,0.19933973948160807
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,768,1024,0.02073813279469808
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,768,1024,0.04938240051269531
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,768,768,0.016501333316167197
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,768,768,0.03869760036468506
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,768,2560,0.06867946783701578
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,768,512,0.012762666742006937
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,768,512,0.02977493405342102
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,768,4096,0.10533546606699626
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,768,256,0.009152000149091084
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,768,256,0.023193599780400594
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,768,3584,0.09376853307088216
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,768,128,0.007246933380762736
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,768,128,0.02036693294843038
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,768,2048,0.05798293352127075
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,768,64,0.00631039987007777
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,768,3072,0.0808906634648641
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,768,64,0.01999893387158712
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,768,32,0.006460799773534138
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,768,32,0.01989226738611857
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,768,1536,0.047031466166178384
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,512,65536,0.7073247909545899
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,512,16384,0.19680213928222656
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,768,768,0.02869759996732076
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,512,16384,0.6196800231933594
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,512,12288,0.13178026676177979
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,512,65536,2.2746292114257813
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,512,12288,0.44356692632039385
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,512,10240,0.11041920185089112
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,768,1024,0.035478401184082034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,512,10240,0.3704010645548502
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,512,8192,0.08917333285013834
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,512,8192,0.29985599517822265
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,768,256,0.017498666048049928
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,768,128,0.014908799529075622
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,512,7168,0.0780512015024821
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,512,7168,0.26419092814127604
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,768,512,0.022667733828226726
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,512,6144,0.06730773448944091
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,512,6144,0.2292405287424723
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,512,5120,0.05654400189717611
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,512,5120,0.1941482702891032
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,512,10240,0.21498133341471354
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,512,4096,0.04723840157190959
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,512,4096,0.15861546198527018
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,512,65536,1.3087029774983725
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,512,3584,0.04174293279647827
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,512,12288,0.2564917405446371
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,512,3584,0.14317439397176107
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,512,3072,0.036142933368682864
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,512,16384,0.3352821350097656
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,512,3072,0.12583893140157063
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,512,2560,0.03102506597836812
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,512,7168,0.15378986994425456
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,512,2560,0.10576106707255047
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,512,5120,0.11098133722941081
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,512,2048,0.025225599606831867
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,512,2048,0.08752426306406656
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,512,8192,0.17571199735005696
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,512,1536,0.01960853338241577
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,512,1536,0.06481599807739258
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,512,6144,0.13272853692372638
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,512,1024,0.014642133315404256
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,512,1024,0.043783466021219894
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,512,768,0.01207360029220581
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,512,768,0.03435946702957153
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,512,4096,0.09163306554158529
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,512,512,0.009746133287747701
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,512,512,0.026658133665720625
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,512,2560,0.059811198711395265
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,512,256,0.007320533196131389
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,512,256,0.021357866128285725
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,512,3584,0.08094720045725504
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,512,128,0.00617386649052302
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,512,128,0.019466666380564372
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,512,3072,0.07094613711039224
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,512,64,0.005479466418425242
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,512,64,0.018914133310317993
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,512,32,0.005696000158786773
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,512,2048,0.05013546546300253
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,512,32,0.018217599391937254
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,256,65536,0.4216927846272786
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,512,1536,0.039817599455515544
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,256,16384,0.10489813486735027
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,256,16384,0.5469568252563477
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,256,65536,2.146290079752604
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,512,1024,0.02927359938621521
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,256,12288,0.08298666477203369
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,512,768,0.024554665883382162
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,256,10240,0.06884693304697673
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,256,12288,0.4153621355692546
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,256,10240,0.35065174102783203
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,256,8192,0.05506560007731119
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,256,8192,0.2830783843994141
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,512,128,0.012121599912643433
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,512,256,0.014124799768129984
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,256,7168,0.049541334311167404
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,256,7168,0.25031147003173826
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,256,6144,0.04370773235956828
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,512,512,0.018953599532445273
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,256,6144,0.21664533615112305
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,256,5120,0.03720320065816243
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,256,5120,0.18316373825073243
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,256,10240,0.18722666104634603
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,256,65536,1.1137332916259766
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,256,4096,0.03039039969444275
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,256,4096,0.14935466448465984
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,256,3584,0.02791573405265808
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,256,12288,0.22552000681559242
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,256,3584,0.13237760066986085
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,256,3072,0.024972800413767496
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,256,3072,0.11507840156555176
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,256,16384,0.293780263264974
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,256,2560,0.020974934101104736
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,256,2560,0.09822399616241455
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,256,7168,0.13433067003885907
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,256,2048,0.013938132921854654
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,256,5120,0.09628266493479411
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,256,2048,0.07903786500295004
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,256,1536,0.010989866654078166
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,256,1536,0.05772053400675455
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,256,8192,0.15201813379923504
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,256,1024,0.008646399776140849
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,256,1024,0.03709760109583537
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,256,6144,0.1152341365814209
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,256,768,0.007463466624418895
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,256,768,0.029487999280293782
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,256,2560,0.05062079826990763
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,256,4096,0.07818026542663574
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,256,512,0.00622080018122991
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,256,512,0.024692267179489136
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,256,256,0.005064533154169718
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,256,256,0.019962666432062785
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,256,3584,0.06930346488952636
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,256,128,0.004498133560021719
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,256,128,0.017730132738749186
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,256,2048,0.041840000947316484
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,256,64,0.00412266676624616
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,256,64,0.017078399658203125
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,256,32,0.004375466704368591
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,256,3072,0.060371200243631996
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,256,32,0.017038933436075845
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,256,1536,0.03337173461914063
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,128,65536,0.4163125356038411
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,128,16384,0.10862613519032796
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,256,768,0.019059199094772338
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,128,16384,0.5446826934814453
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,128,12288,0.07453973293304443
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,128,12288,0.4112736066182454
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,128,65536,2.1432703653971354
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,128,10240,0.062498132387797035
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,256,1024,0.02324906587600708
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,128,10240,0.3457066535949707
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,256,256,0.010930132865905762
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,128,8192,0.05100906689961752
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,128,8192,0.2777557373046875
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,256,128,0.008984532952308655
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,256,512,0.015186132987340293
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,128,7168,0.04490773280461629
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,128,7168,0.2462613264719645
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,128,6144,0.04018773237864177
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,128,6144,0.21313172976175943
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,128,5120,0.03359359900156657
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,128,5120,0.18013013203938802
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,128,16384,0.28383359909057615
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,128,4096,0.027874133984247845
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,128,65536,1.073849614461263
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,128,4096,0.1459658622741699
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,128,3584,0.02478613257408142
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,128,3584,0.1293066660563151
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,128,12288,0.21484479904174805
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,128,3072,0.02204266587893168
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,128,3072,0.11243093013763428
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,128,10240,0.18052585919698078
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,128,2560,0.018572799364725747
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,128,2560,0.09605226516723633
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,128,8192,0.14540799458821613
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,128,2048,0.009404800335566203
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,128,7168,0.12835946877797444
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,128,2048,0.07643199761708577
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,128,1536,0.007574399809042613
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,128,1536,0.05432960192362467
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,128,6144,0.11027306715647381
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,128,1024,0.006257066627343495
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,128,1024,0.03571199973424276
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,128,5120,0.09212586879730225
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,128,768,0.0054506664474805195
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,128,768,0.028100266059239702
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,128,4096,0.07433706919352213
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,128,512,0.004716800153255462
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,128,512,0.023383466402689616
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,128,3072,0.056252801418304445
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,128,256,0.004117333392302195
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,128,256,0.018997333447138467
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,128,3584,0.06567786534627279
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,128,128,0.003588266670703888
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,128,128,0.017246933778127034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,128,2048,0.03871999979019165
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,128,64,0.0033610666791598
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,128,2560,0.04815253416697184
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,128,1024,0.021002666155497233
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,128,32,0.0037269333998362223
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,128,64,0.016613333423932394
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,128,32,0.01625599960486094
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,64,16384,0.09405013720194498
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,64,65536,0.3854111989339193
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,64,12288,0.07197973728179932
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,64,16384,0.5399328231811523
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,64,10240,0.06048640012741089
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,64,12288,0.40879252751668294
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,64,8192,0.04915093183517456
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,128,1536,0.029704533020655316
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,64,10240,0.34354238510131835
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,64,7168,0.043494399388631186
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,64,6144,0.038427734375
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,64,8192,0.27738132476806643
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,64,7168,0.2449120044708252
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,64,5120,0.03249706625938416
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,128,768,0.017092265685399375
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,64,4096,0.02693333427111308
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,64,6144,0.21075520515441895
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,64,5120,0.17863465944925944
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,128,512,0.013553067048390707
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,64,3584,0.023924267292022704
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,64,3072,0.021115734179814657
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,64,4096,0.14601386388142903
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,64,3584,0.1287775993347168
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,64,65536,2.1361790974934896
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,64,2560,0.015677866339683533
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,64,3072,0.11141760349273681
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,64,2048,0.00946453313032786
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,64,2560,0.09439679781595865
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,128,256,0.009876267115275065
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,64,2048,0.07547093232472737
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,64,1536,0.007965866724650066
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,64,1536,0.053415465354919436
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,64,1024,0.006217599908510844
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,64,768,0.005650133391221364
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,64,1024,0.03323306639989217
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,64,512,0.00447573314110438
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,64,768,0.027771733204523724
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,64,256,0.0037962667644023894
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,64,512,0.023371734221776328
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,64,128,0.00365226666132609
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,64,256,0.01871466636657715
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,64,32,0.003420799970626831
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,64,128,0.016773333152135216
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,64,64,0.0033439998825391137
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,64,64,0.01630506714185079
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8192,128,128,0.00790293316046397
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,64,32,0.015749333302179973
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,32,16384,0.09318826993306478
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,32,12288,0.07137920061747233
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,32,10240,0.05978986819585165
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,32,8192,0.04877119859059652
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,32,7168,0.042948265870412186
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,32,65536,0.3805216153462728
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,32,12288,0.4102176030476888
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,32,10240,0.3431626637776693
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,32,16384,0.5405322392781575
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,32,6144,0.038173866271972653
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,32,8192,0.27682453791300454
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,32,5120,0.0321343998114268
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,32,7168,0.24403732617696128
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,32,6144,0.21146453221639
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,32,5120,0.17835200627644857
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,32,4096,0.026494934161504106
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,32,3584,0.02335253357887268
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,32,4096,0.14477334022521973
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,32,3072,0.020772266387939452
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,32,2560,0.015132799744606018
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,32,3584,0.12894079685211182
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,32,3072,0.11116693019866944
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,32,2048,0.008237866560618083
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,32,2560,0.09428586959838867
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,32,1536,0.007237333556016285
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,32,2048,0.07516160011291503
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,32,1536,0.05237866640090942
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,32,1024,0.00544106662273407
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,32,768,0.00487360010544459
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,32,1024,0.03213226596514384
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,32,512,0.004286933441956838
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,32,768,0.0274944007396698
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,32,256,0.0035946667194366455
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,32,65536,2.120941925048828
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,32,512,0.023078399896621703
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,32,128,0.0032458665470282235
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,32,256,0.01889280080795288
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,32,128,0.016999467213948568
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,32,64,0.003005866706371307
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,32,64,0.015982932845751443
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8192,32,32,0.0030965333183606463
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8192,32,32,0.01573973298072815
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,65536,12288,4.841855875651041
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,65536,10240,4.286936442057291
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,65536,16384,6.703632100423176
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,65536,10240,7.501177469889323
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,65536,12288,8.913855997721354
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,65536,12288,5.873160298665365
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,65536,8192,3.3354400634765624
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,65536,10240,4.927615865071615
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,65536,16384,7.628781636555989
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,65536,8192,6.002295430501302
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,65536,16384,12.14923095703125
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,65536,7168,2.90994135538737
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,65536,6144,2.637261962890625
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,65536,7168,6.072554524739584
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,65536,5120,2.063105010986328
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,65536,8192,3.999457041422526
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,65536,6144,5.3758600870768225
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,65536,5120,4.378817240397135
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,65536,4096,1.9958805084228515
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,65536,7168,3.4576789855957033
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,65536,6144,2.920007578531901
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,65536,3584,1.5166773478190103
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,65536,3584,2.7152544657389326
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,65536,4096,3.14266357421875
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,65536,3072,2.322499084472656
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,65536,2560,1.0659114837646484
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,65536,3072,1.312708282470703
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,65536,5120,2.568024444580078
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,65536,2560,1.9366079966227214
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,65536,2048,0.8809813181559244
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,65536,4096,2.102693303426107
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,65536,2048,1.6758965810139972
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,65536,1536,1.1808650970458985
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,65536,1536,0.6636800130208333
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,65536,1024,0.78864320119222
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,65536,1024,0.49500160217285155
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,65536,3584,1.8268789927164715
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,65536,768,0.4049887975056966
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,65536,768,0.6045973459879558
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,65536,3072,1.6275296529134113
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,65536,512,0.4336202621459961
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,65536,512,0.3254773457845052
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,65536,256,0.28084052403767906
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,65536,256,0.24003307024637857
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,65536,128,0.22456960678100585
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,65536,2560,1.2727381388346353
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,65536,2048,1.0875935872395834
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,65536,128,0.22199680010477701
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,65536,64,0.20823359489440918
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,65536,32,0.20401066144307456
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,65536,64,0.21769280433654786
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,65536,1536,0.8568639755249023
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,65536,32,0.21346559524536132
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,65536,1024,0.6314624150594075
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,65536,768,0.5746016184488932
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,65536,512,0.45836801528930665
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,65536,256,0.3837973276774088
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,65536,128,0.3674976030985514
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,16384,16384,1.8183701833089192
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,16384,16384,3.198311360677083
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,16384,12288,1.3140021006266276
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,16384,12288,1.5439295450846353
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,16384,12288,2.402791341145833
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,16384,10240,1.1633525848388673
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,16384,10240,1.9717194875081379
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,16384,8192,0.9091520309448242
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,16384,16384,2.114517339070638
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,16384,65536,7.147459411621094
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,16384,8192,1.5939445495605469
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,16384,7168,0.857203229268392
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,16384,6144,1.1219797770182292
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,16384,7168,1.3979775746663412
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,16384,6144,0.7184277216593424
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,16384,10240,1.1845675150553385
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,16384,6144,0.7449493408203125
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,16384,5120,0.9509973526000977
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,16384,5120,0.5837727864583333
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,16384,4096,0.499725882212321
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,16384,4096,0.7561269124348958
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,16384,3584,0.4130538622538249
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,16384,3584,0.6650741577148438
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,16384,8192,0.942964235941569
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,16384,7168,0.8311850865681967
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,16384,65536,8.398779805501302
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,16384,3072,0.6106645584106445
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,16384,3072,0.38087679545084635
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,16384,65536,12.677274576822917
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,16384,3072,0.4362815856933594
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,16384,2560,0.32537705103556314
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,16384,2560,0.5070570627848308
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,16384,2048,0.2572885354359945
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,16384,2048,0.44555199940999346
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,16384,2048,0.31389118830362955
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,16384,1536,0.29505386352539065
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,16384,1536,0.19493014017740887
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,16384,1024,0.2033770720163981
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,16384,1024,0.14740907351175944
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,16384,5120,0.5947445551554362
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,16384,768,0.15119892756144207
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,16384,768,0.12090026537577311
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,16384,512,0.11263039906819661
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,16384,512,0.0928160031636556
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,16384,256,0.06770239671071371
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,16384,256,0.07709440390268961
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,16384,4096,0.5019445419311523
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,16384,128,0.06684160232543945
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,16384,128,0.0738645315170288
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,16384,3584,0.4384138743082683
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,16384,64,0.05501439968744913
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,16384,64,0.0708085298538208
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,16384,32,0.05581760009129842
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,16384,32,0.07048106988271077
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,16384,2560,0.32655251820882164
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,16384,1536,0.2290741284688314
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,16384,1024,0.17484906514485676
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,16384,768,0.15640212694803873
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,16384,512,0.12780266602834064
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,16384,256,0.10752533276875813
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,16384,128,0.09892053604125976
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,12288,16384,1.3813589731852214
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,12288,16384,2.398822275797526
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,12288,12288,1.0482624053955079
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,12288,12288,1.6270645141601563
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,12288,10240,0.875714111328125
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,12288,10240,1.4024480183919272
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,12288,8192,0.7120512008666993
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,12288,8192,1.1502197265625
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,12288,65536,5.657375081380208
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,12288,16384,1.5850271860758463
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,12288,7168,0.614471435546875
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,12288,7168,1.0131338755289714
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,12288,12288,1.08306032816569
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,12288,6144,0.5661280314127605
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,12288,6144,0.8463370641072592
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,12288,10240,0.9441781361897787
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,12288,5120,0.6678645451863606
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,12288,5120,0.47900692621866864
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,12288,4096,0.3687829335530599
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,12288,4096,0.5439573287963867
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,12288,65536,8.988972981770832
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,12288,3584,0.5191242535909016
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,12288,3584,0.33225812911987307
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,12288,8192,0.7186335881551107
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,12288,3072,0.3961418787638346
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,12288,7168,0.6372320175170898
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,12288,3072,0.29499092102050783
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,12288,2560,0.3486975987752279
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,12288,2560,0.24156586329142252
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,12288,6144,0.534933344523112
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,12288,65536,6.759082539876301
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,12288,2048,0.27160746256510415
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,12288,2048,0.18938560485839845
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,12288,1536,0.20902506510416666
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,12288,1536,0.1564736048380534
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,12288,5120,0.4614944140116374
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,12288,1024,0.1428181330362956
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,12288,1024,0.11614186763763427
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,12288,768,0.11324693361918133
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,12288,768,0.09740479787190756
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,12288,768,0.11961173216501872
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,12288,4096,0.375543467203776
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,12288,512,0.0846122662226359
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,12288,512,0.07693973382314047
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,12288,3584,0.3366741180419922
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,12288,256,0.05631253321965536
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,12288,256,0.06586560010910034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,12288,3072,0.29080425898234047
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,12288,128,0.04710186719894409
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,12288,128,0.05773439804712931
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,12288,2560,0.25209280649820964
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,12288,64,0.04292800029118855
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,12288,64,0.05847040017445883
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,12288,2048,0.21509547233581544
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,12288,32,0.04367253383000692
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,12288,32,0.058589867750803624
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,12288,1536,0.17476587295532225
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,12288,1024,0.13551360766092937
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,12288,512,0.09885120391845703
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,10240,16384,1.9214079538981121
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,12288,256,0.0824938694636027
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,12288,128,0.07548800309499105
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,10240,16384,1.170740254720052
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,10240,16384,1.2543818155924478
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,10240,12288,0.9007658640543619
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,10240,12288,1.3568330128987631
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,10240,10240,1.2135157267252603
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,10240,10240,0.7612927754720051
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,10240,10240,0.8064469019571939
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,10240,8192,0.9108725229899088
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,10240,8192,0.6403647740681966
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,10240,65536,5.020229085286458
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,10240,7168,0.8598047892252604
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,10240,7168,0.5306645393371582
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,10240,7168,0.5627541224161784
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,10240,6144,0.6666869481404623
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,10240,6144,0.48163518905639646
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,10240,6144,0.4641119956970215
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,10240,5120,0.587393061319987
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,10240,5120,0.40368852615356443
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,10240,4096,0.4524341265360515
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,10240,4096,0.34475520451863606
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,10240,12288,0.9392223993937174
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,10240,3584,0.40613333384195965
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,10240,3584,0.28274453481038414
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,10240,3072,0.37802346547444665
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,10240,65536,7.478796895345051
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,10240,3072,0.240557861328125
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,10240,2560,0.20639146169026695
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,10240,2560,0.2992021242777506
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,10240,8192,0.6075157165527344
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,10240,2048,0.23351680437723793
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,10240,2048,0.19787732760111493
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,10240,65536,5.173113505045572
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,10240,1536,0.2045205275217692
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,10240,1536,0.15383040110270182
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,10240,1024,0.1316639979680379
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,10240,1024,0.10521173477172852
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,10240,5120,0.39126933415730797
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,10240,768,0.10504000186920166
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,10240,4096,0.3179242769877116
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,10240,768,0.09171199798583984
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,10240,512,0.07803839842478434
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,10240,3584,0.28128852844238283
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,10240,512,0.07194773356119791
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,10240,256,0.05032533407211304
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,10240,256,0.05785173177719116
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,10240,3072,0.24554239908854164
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,10240,128,0.03976960182189941
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,10240,128,0.051336534818013514
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,10240,2560,0.21317013104756674
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,10240,64,0.036073601245880126
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,10240,64,0.05184746583302816
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,10240,32,0.03679253260294597
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,10240,2048,0.18160533905029297
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,10240,32,0.051975464820861815
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,10240,1536,0.14870400428771974
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,10240,1024,0.11502400239308674
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,10240,768,0.10004586378733318
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,10240,512,0.08321920235951742
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,10240,256,0.06996373335520426
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,8192,16384,1.4312479654947916
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,8192,16384,0.9796288172403971
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,10240,128,0.06381973425547281
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,8192,12288,0.7462677637736003
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,8192,12288,1.1286516825358073
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,8192,10240,0.9153535842895508
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,8192,65536,3.996008555094401
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,8192,10240,0.6245834350585937
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,8192,8192,0.7875167846679687
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,8192,8192,0.5076170603434245
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,8192,7168,0.4380906740824382
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,8192,7168,0.6687456130981445
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,8192,65536,5.940263366699218
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,8192,6144,0.565555191040039
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,8192,16384,1.0354549407958984
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,8192,6144,0.39286079406738283
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,8192,12288,0.7587957382202148
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,8192,5120,0.5140629450480143
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,8192,5120,0.3295765240987142
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,8192,4096,0.2618186632792155
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,8192,4096,0.38511358896891273
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,8192,10240,0.6316074371337891
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,8192,65536,4.316119384765625
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,8192,3584,0.3161888122558594
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,8192,8192,0.496013863881429
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,8192,3584,0.2509343942006429
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,8192,3072,0.267627747853597
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,8192,3072,0.2084928035736084
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,8192,7168,0.43052161534627276
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,8192,2560,0.22227840423583983
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,8192,2560,0.1881696065266927
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,8192,2048,0.18140053749084473
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,8192,6144,0.3697397232055664
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,8192,2048,0.14962666829427082
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,8192,1536,0.13896212577819825
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,8192,1536,0.117193603515625
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,8192,5120,0.3201866785685221
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,8192,1024,0.09772053559621176
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,8192,1024,0.08797439734141031
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,8192,4096,0.2602517286936442
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,8192,768,0.07896746794382731
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,8192,768,0.0753760019938151
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,8192,3584,0.23271466890970866
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,8192,512,0.05788480043411255
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,8192,512,0.0591210683186849
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,8192,256,0.03750506639480591
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,8192,3072,0.20257813135782876
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,8192,256,0.049098666508992514
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,8192,128,0.03238079945246379
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,8192,128,0.04383999903996785
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,8192,2560,0.17724372545878092
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,8192,64,0.029306666056315107
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,8192,64,0.04506133397420247
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,8192,2048,0.14922879536946615
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,8192,32,0.030423466364542646
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,8192,32,0.045341865221659346
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,8192,1536,0.12200213273366292
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,8192,1024,0.09419840176900228
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,8192,768,0.083404803276062
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,8192,512,0.06941546599070231
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,7168,16384,1.3018527984619142
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,8192,256,0.0571232000986735
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,7168,16384,0.8794741312662759
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,8192,128,0.05287893215815226
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,7168,12288,0.6610442479451497
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,7168,12288,0.9805130640665689
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,7168,10240,0.7748512268066406
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,7168,65536,3.5827616373697913
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,7168,10240,0.566431999206543
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,7168,8192,0.6805311838785808
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,7168,8192,0.4665088017781575
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,7168,7168,0.398091729482015
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,7168,7168,0.583021863301595
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,7168,65536,5.15278065999349
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,7168,6144,0.500271987915039
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,7168,6144,0.37638400395711263
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,7168,16384,0.9202218373616537
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,7168,5120,0.3763061205546061
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,7168,12288,0.667738660176595
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,7168,5120,0.3207626660664876
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,7168,4096,0.3430432001749674
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,7168,4096,0.26354986826578775
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,7168,10240,0.5506464004516601
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,7168,3584,0.29220587412516275
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,7168,65536,3.702556864420573
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,7168,3584,0.2238826592763265
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,7168,8192,0.44231786727905276
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,7168,3072,0.2565375963846842
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,7168,3072,0.19530773162841797
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,7168,2560,0.20477333068847656
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,7168,2560,0.16250346501668295
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,7168,2048,0.15920960108439128
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,7168,2048,0.13145386377970378
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,7168,7168,0.38477865854899085
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,7168,1536,0.12149226665496826
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,7168,1536,0.10854612986246745
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,7168,1536,0.11357440153757732
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,7168,6144,0.3387989362080892
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,7168,1024,0.08597226937611899
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,7168,1024,0.08081813653310141
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,7168,768,0.0697322686513265
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,7168,768,0.06846400101979574
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,7168,5120,0.2815349260965983
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,7168,512,0.05177066723505656
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,7168,512,0.055417601267496744
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,7168,4096,0.23175253868103027
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,7168,512,0.06317013502120972
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,7168,256,0.03376426696777344
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,7168,256,0.04557439883550008
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,7168,128,0.02956053415934245
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,7168,128,0.042108798027038576
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,7168,3584,0.20971199671427407
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,7168,64,0.026573866605758667
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,7168,3072,0.18413333892822265
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,7168,64,0.04207466840744019
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,7168,32,0.027138133843739826
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,7168,32,0.04214719931284587
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,7168,2560,0.15707093874613445
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,7168,2048,0.13456427256266276
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,7168,1024,0.08650346597035727
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,6144,16384,1.055519994099935
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,7168,768,0.07618666489919027
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,7168,256,0.05208213329315185
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,6144,16384,0.8213312149047851
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,7168,128,0.04814186493555705
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,6144,16384,0.8077856063842773
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,6144,65536,3.2172437032063805
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,6144,12288,0.7899274826049805
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,6144,12288,0.6006122589111328
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,6144,10240,0.6676511764526367
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,6144,10240,0.5106848080952961
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,6144,8192,0.517410119374593
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,6144,8192,0.43007465998331706
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,6144,65536,4.49150390625
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,6144,7168,0.4964000066121419
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,6144,7168,0.36630398432413735
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,6144,6144,0.3142431894938151
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,6144,6144,0.43395519256591797
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,6144,5120,0.3320490519205729
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,6144,5120,0.2810720125834147
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,6144,4096,0.2649738629659017
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,6144,4096,0.21867839495340982
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,6144,12288,0.5627487818400065
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,6144,65536,3.405273691813151
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,6144,3584,0.2509450594584147
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,6144,3584,0.21984000205993653
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,6144,10240,0.47606827418009445
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,6144,3072,0.20384747187296548
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,6144,3072,0.166758394241333
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,6144,8192,0.3862197240193685
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,6144,2560,0.16806507110595703
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,6144,2560,0.14955199559529622
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,6144,2048,0.13808107376098633
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,6144,7168,0.3360992113749186
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,6144,2048,0.12069013118743896
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,6144,6144,0.28723945617675783
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,6144,1536,0.10662293434143066
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,6144,1536,0.09536533355712891
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,6144,1024,0.0740821361541748
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,6144,5120,0.2489290714263916
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,6144,1024,0.0731754700342814
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,6144,768,0.05922453403472901
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,6144,768,0.0620138684908549
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,6144,4096,0.20353493690490723
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,6144,512,0.04500480095545451
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,6144,512,0.050928000609079996
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,6144,3584,0.17993920644124348
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,6144,256,0.030228267113367718
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,6144,3072,0.15735893249511718
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,6144,256,0.04174933433532715
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,6144,128,0.02569920023282369
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,6144,2560,0.13799039522806805
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,6144,128,0.039061331748962404
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,6144,64,0.024029866854349772
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,6144,64,0.038805333773295085
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,6144,32,0.025228800376256307
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,6144,2048,0.1165013313293457
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,6144,32,0.03919893503189087
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,6144,1536,0.0960863987604777
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,6144,1024,0.07464106877644858
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,6144,768,0.06397333145141601
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,6144,512,0.05383253494898478
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,5120,16384,0.9210197448730468
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,5120,16384,0.6909877141316731
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,5120,16384,0.6527658462524414
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,6144,256,0.044675199190775554
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,5120,12288,0.5269493420918783
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,5120,12288,0.7060597101847331
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,6144,128,0.041060264905293783
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,5120,65536,2.767073059082031
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,5120,10240,0.4471765200297038
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,5120,10240,0.5344138463338216
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,5120,10240,0.4491402626037598
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,5120,8192,0.4298240025838216
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,5120,7168,0.37864532470703127
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,5120,8192,0.3732394536336263
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,5120,7168,0.3447680155436198
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,5120,65536,3.8018315633138022
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,5120,6144,0.33437013626098633
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,5120,6144,0.27902186711629234
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,5120,5120,0.29573227564493815
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,5120,5120,0.21193493207295738
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,5120,5120,0.26438719431559243
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,5120,4096,0.2289962609608968
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,5120,4096,0.19193600018819174
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,5120,3584,0.20219626426696777
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,5120,3584,0.16902079582214355
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,5120,3072,0.16753600438435873
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,5120,3072,0.1473749319712321
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,5120,12288,0.49210348129272463
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,5120,65536,2.904583485921224
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,5120,2560,0.1491637388865153
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,5120,2560,0.14298346837361653
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,5120,8192,0.3277632077534994
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,5120,7168,0.28973865509033203
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,5120,2048,0.12765440146128337
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,5120,2048,0.10651520093282063
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,5120,1536,0.09412053426106771
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,5120,1536,0.08719893296559653
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,5120,6144,0.2552234649658203
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,5120,1024,0.06401813427607218
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,5120,1024,0.06681919892628987
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,5120,4096,0.17493972778320313
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,5120,768,0.052374398708343504
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,5120,768,0.057384534676869714
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,5120,3584,0.1573098659515381
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,5120,512,0.03951253493626912
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,5120,512,0.047072001298268634
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,5120,3072,0.13814080556233724
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,5120,256,0.02805546720822652
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,5120,256,0.03723413149515788
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,5120,2560,0.11872106393178303
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,5120,128,0.022473599513371786
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,5120,128,0.03389866749445598
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,5120,128,0.036210131645202634
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,5120,2048,0.10107626914978027
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,5120,64,0.019604265689849854
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,5120,64,0.03422506650288899
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,5120,32,0.020096000035603842
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,5120,1536,0.0846560001373291
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,5120,32,0.033641600608825685
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,5120,1024,0.06540160179138184
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,5120,768,0.05714026689529419
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,4096,16384,0.685100809733073
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,5120,512,0.04710719982783
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,4096,16384,0.6090112050374349
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,4096,12288,0.5228245417277019
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,4096,12288,0.45830825169881184
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,5120,256,0.03909333149592082
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,4096,10240,0.43445758819580077
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,4096,10240,0.38684587478637694
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,4096,65536,2.4135231018066405
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,4096,8192,0.3467967987060547
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,4096,8192,0.3146709442138672
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,4096,65536,3.0244107564290363
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,4096,7168,0.3297109285990397
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,4096,7168,0.2775210698445638
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,4096,6144,0.2710624059041341
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,4096,6144,0.24436799685160318
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,4096,5120,0.218613338470459
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,4096,5120,0.2135178724924723
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,4096,16384,0.5308874766031901
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,4096,4096,0.18511999448140462
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,4096,12288,0.4108298619588216
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,4096,4096,0.1808501402537028
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,4096,3584,0.15517013867696125
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,4096,65536,2.3870165506998697
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,4096,10240,0.3365386644999186
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,4096,3584,0.1546357313791911
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,4096,3072,0.13296106656392415
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,4096,3072,0.1319541295369466
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,4096,8192,0.27651945749918616
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,4096,2560,0.11328319708506267
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,4096,2560,0.1114357312520345
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,4096,7168,0.24015572865804038
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,4096,2048,0.0930069367090861
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,4096,2048,0.0930741310119629
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,4096,1536,0.07147093613942465
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,4096,6144,0.20739839871724447
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,4096,1536,0.07557760079701742
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,4096,1024,0.05099733273188273
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,4096,1024,0.05845333337783813
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,4096,5120,0.1776458740234375
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,4096,768,0.04156586726506551
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,4096,768,0.05001173416773478
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,4096,4096,0.1481280008951823
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,4096,512,0.03213866750399272
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,4096,512,0.04238506555557251
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,4096,3584,0.1294005314509074
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,4096,256,0.021811199188232423
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,4096,256,0.033869866530100504
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,4096,3072,0.11430826981862385
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,4096,2560,0.09907413323720296
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,4096,128,0.016851200660069784
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,4096,128,0.030152533451716108
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,4096,64,0.014759467045466105
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,4096,64,0.03030186692873637
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,4096,2048,0.08430293401082357
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,4096,32,0.01572053333123525
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,4096,32,0.029663999875386555
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,4096,1536,0.06894720395406087
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,4096,1024,0.05366400082906088
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,4096,768,0.04689066807428996
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,3584,16384,0.6455210367838542
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,3584,16384,0.5818058649698894
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,3584,12288,0.4767562548319499
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,4096,512,0.038711468378702804
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,3584,65536,2.305522155761719
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,4096,256,0.032232532898585006
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,3584,65536,2.8606592814127607
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,4096,128,0.029653332630793255
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,3584,12288,0.48360001246134443
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,3584,10240,0.40213654836018875
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,3584,10240,0.36967360178629555
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,3584,8192,0.3160778681437174
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,3584,8192,0.3209856033325195
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,3584,7168,0.2797738711039225
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,3584,7168,0.2788629213968913
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,3584,6144,0.24170346260070802
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,3584,6144,0.2372117360432943
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,3584,16384,0.4856287956237793
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,3584,5120,0.2178271929423014
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,3584,65536,2.11191889444987
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,3584,5120,0.21019306182861328
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,3584,4096,0.16576320330301922
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,3584,12288,0.36900266011555993
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,3584,4096,0.16755199432373047
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,3584,3584,0.14563199679056804
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,3584,10240,0.3049642562866211
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,3584,8192,0.25053226153055824
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,3584,3584,0.1447327931722005
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,3584,3072,0.1265397310256958
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,3584,3072,0.1256725311279297
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,3584,7168,0.21979200045267738
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,3584,2560,0.10686079661051433
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,3584,2560,0.10693439642588298
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,3584,2048,0.08688746293385824
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,3584,6144,0.1908917268117269
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,3584,2048,0.09114987055460612
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,3584,2048,0.07717759609222412
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,3584,1536,0.06767786343892415
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,3584,1536,0.07481280167897543
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,3584,1024,0.04900799989700318
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,3584,1536,0.06412373383839926
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,3584,1024,0.05771733522415161
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,3584,768,0.040115201473236085
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,3584,768,0.049158398310343424
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,3584,512,0.02991360028584798
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,3584,5120,0.16147519747416178
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,3584,512,0.04012480179468791
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,3584,256,0.0203658660252889
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,3584,256,0.03236693342526754
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,3584,256,0.02945493261019389
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,3584,128,0.0150026669104894
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,3584,128,0.028151466449101763
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,3584,4096,0.134333864847819
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,3584,64,0.014356266458829245
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,3584,3584,0.12005546887715657
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,3584,64,0.029462399085362752
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,3584,32,0.015480533242225647
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,3584,32,0.029038933912913005
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,3584,3072,0.10559360186258952
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,3584,2560,0.09060373306274414
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,3072,16384,0.5104000091552734
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,3584,1024,0.049582934379577635
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,3584,768,0.04359680016835531
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,3072,16384,0.5395445505777995
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,3072,12288,0.3817237218221029
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,3072,12288,0.3992255846659342
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,3072,65536,2.060416030883789
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,3072,65536,2.3194422403971355
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,3584,512,0.03557759920756022
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,3072,10240,0.32276268005371095
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,3072,8192,0.25582826932271324
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,3072,10240,0.3363840103149414
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,3072,8192,0.2782208124796549
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,3072,7168,0.231330140431722
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,3584,128,0.02656426628430684
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,3072,7168,0.2408074696858724
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,3072,6144,0.19354453086853027
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,3072,6144,0.2081439971923828
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,3072,5120,0.16491947174072266
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,3072,5120,0.1766090710957845
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,3072,16384,0.4286527951558431
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,3072,4096,0.13382827440897624
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,3072,65536,1.8041088104248046
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,3072,4096,0.14311893781026203
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,3072,4096,0.13547840118408203
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,3072,3584,0.1183733304341634
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,3072,12288,0.32395413716634114
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,3072,3584,0.13982826868693035
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,3072,3072,0.10193066596984864
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,3072,10240,0.2693834622701009
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,3072,3072,0.11691199938456218
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,3072,2560,0.08583253224690755
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,3072,8192,0.22114453315734864
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,3072,2560,0.09860266844431559
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,3072,2048,0.07006186644236247
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,3072,2048,0.08388266563415528
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,3072,1536,0.05584746599197388
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,3072,7168,0.1986826737721761
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,3072,1536,0.06803413232167563
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,3072,1536,0.057461333274841306
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,3072,1024,0.04076053301493327
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,3072,6144,0.16980692545572917
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,3072,1024,0.0516981323560079
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,3072,768,0.03287253379821777
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,3072,768,0.04394773244857788
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,3072,512,0.025443200270334882
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,3072,512,0.03655253251393636
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,3072,5120,0.14356160163879395
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,3072,256,0.017972266674041747
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,3072,256,0.02932693362236023
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,3072,3584,0.10666346549987793
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,3072,128,0.013969066739082336
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,3072,3072,0.09344639778137206
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,3072,128,0.027157332499821978
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,3072,64,0.012113066514333089
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,3072,2560,0.07989546457926432
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,3072,64,0.026820266246795656
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,3072,32,0.012685867150624594
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,3072,2048,0.06858987013498942
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,3072,32,0.02572159965833028
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,3072,1024,0.044565331935882566
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,3072,768,0.03852159976959228
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,3072,512,0.031678932905197146
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,2560,16384,0.41573333740234375
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,3072,256,0.02534613410631816
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,2560,16384,0.47357546488444013
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,2560,16384,0.3940693219502767
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,2560,12288,0.31695572535196936
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,2560,65536,1.8415456136067707
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,2560,65536,1.8289632161458331
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,2560,12288,0.36087252298990885
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,2560,10240,0.2663914680480957
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,2560,10240,0.31290772755940754
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,2560,8192,0.21695574124654135
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,2560,8192,0.24451200167338052
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,2560,7168,0.19100160598754884
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,2560,7168,0.21851627031962076
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,3072,128,0.023476266860961915
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,2560,6144,0.16234240531921387
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,2560,6144,0.1879989306131999
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,2560,5120,0.13484907150268555
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,2560,5120,0.15952320098876954
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,2560,4096,0.11005547046661376
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,2560,4096,0.1317845344543457
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,2560,12288,0.2841269175211588
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,2560,65536,1.6071573893229167
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,2560,3584,0.09589227040608725
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,2560,10240,0.23819413185119628
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,2560,3584,0.13215253353118897
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,2560,3072,0.083570130666097
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,2560,8192,0.19539839426676434
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,2560,3072,0.11601920127868652
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,2560,2560,0.0706933339436849
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,2560,2560,0.09031786918640136
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,2560,2560,0.07069013118743897
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,2560,7168,0.1747093359629313
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,2560,2048,0.05944639841715495
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,2560,2048,0.07373653252919515
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,2560,1536,0.04622933467229207
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,2560,6144,0.15087253252665203
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,2560,1536,0.05999999841054281
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,2560,1536,0.05190933148066203
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,2560,1024,0.0326474666595459
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,2560,1024,0.045102934042612716
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,2560,768,0.026279467344284057
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,2560,768,0.03365226586659749
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,2560,768,0.037333333492279054
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,2560,512,0.020103466510772706
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,2560,512,0.03099199930826823
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,2560,5120,0.12620373566945392
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,2560,256,0.013831466436386108
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,2560,256,0.02552853425343831
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,2560,128,0.01046720047791799
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,2560,128,0.02270080049832662
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,2560,128,0.020244266589482626
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,2560,64,0.009171199798583985
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,2560,64,0.022929066419601442
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,2560,32,0.009303466478983561
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,2560,32,0.022346667448679605
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,2560,4096,0.10394133726755779
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,2560,3584,0.09312533537546794
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,2560,3072,0.08126400311787924
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,2048,65536,1.4139413197835287
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,2048,16384,0.33164587020874026
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,2560,2048,0.06063040097554525
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,2560,1024,0.03891306718190511
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,2048,12288,0.25596586863199866
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,2048,65536,1.6715189615885417
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,2048,16384,0.4830037434895833
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,2560,512,0.027128533522288008
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,2048,10240,0.21152000427246093
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,2048,12288,0.32757441202799475
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,2048,10240,0.2801557223002116
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,2560,256,0.022009599208831786
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,2048,8192,0.1704576015472412
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,2048,8192,0.22351679801940919
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,2048,7168,0.14776746431986493
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,2048,7168,0.1972373326619466
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,2048,6144,0.12767039934794108
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,2048,6144,0.12822506427764893
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,2048,6144,0.17092480659484863
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,2048,5120,0.10747093359629314
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,2048,5120,0.1450709342956543
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,2048,4096,0.08784746328989665
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,2048,4096,0.11927039623260498
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,2048,16384,0.32384106318155925
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,2048,3584,0.07712106704711914
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,2048,3584,0.10836479663848878
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,2048,12288,0.24603840510050454
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,2048,65536,1.298254903157552
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,2048,3072,0.06813653310139975
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,2048,10240,0.20603413581848146
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,2048,2560,0.056901331742604574
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,2048,3072,0.09681066672007242
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,2048,8192,0.16870293617248536
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,2048,2560,0.07977706591288249
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,2048,2048,0.046562135219573975
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,2048,7168,0.14790293375651042
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,2048,2048,0.06591786543528239
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,2048,1536,0.03607253233591716
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,2048,1536,0.05189973513285319
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,2048,1536,0.04207466840744019
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,2048,5120,0.10830293496449787
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,2048,1024,0.025740800301233928
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,2048,1024,0.03893866539001465
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,2048,768,0.021449599663416544
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,2048,768,0.033242666721343996
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,2048,4096,0.09010453224182129
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,2048,768,0.02837120095888774
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,2048,512,0.016229333480199178
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,2048,512,0.027718400955200194
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,2048,256,0.011356799801190694
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,2048,256,0.023612799247105916
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,2048,3584,0.07932373682657877
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,2048,128,0.00881599982579549
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,2048,3072,0.06991679668426513
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,2048,128,0.021206400791803994
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,2048,2560,0.06104640165964762
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,2048,64,0.00765119989713033
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,2048,32,0.00806933343410492
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,2048,64,0.021631999810536703
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,2048,32,0.0212991992632548
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,2048,2048,0.05179839928944906
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,2048,1024,0.03299733400344849
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,1536,16384,0.25863146781921387
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,1536,65536,1.1362762451171875
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,2048,512,0.02286720077196757
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,1536,16384,0.3849109331766764
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,1536,12288,0.23180480003356935
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,2048,256,0.018472532431284584
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,1536,12288,0.2938986778259277
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,1536,65536,1.4937578837076821
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,1536,10240,0.17694613138834636
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,1536,10240,0.24589333534240723
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,1536,10240,0.17469760576883953
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,1536,8192,0.13118293285369872
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,1536,8192,0.21641599337259926
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,1536,7168,0.1163413365681966
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,1536,7168,0.17884267171223958
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,1536,6144,0.10045759677886963
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,1536,6144,0.15605440139770507
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,1536,5120,0.08361813227335611
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,1536,5120,0.13316480318705243
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,2048,128,0.01705066760381063
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,1536,4096,0.06934933662414551
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,1536,4096,0.10959146817525227
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,1536,65536,1.114694341023763
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,1536,16384,0.2716789245605469
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,1536,3584,0.0643338680267334
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,1536,12288,0.20730667114257811
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,1536,3584,0.0964682658513387
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,1536,3072,0.05233386754989624
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,1536,3072,0.08373760382334391
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,1536,2560,0.04420160055160523
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,1536,8192,0.1411765257517497
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,1536,2560,0.07131626605987548
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,1536,2560,0.05136533180872599
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,1536,7168,0.12628053029378256
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,1536,2048,0.03604480028152466
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,1536,2048,0.058285868167877196
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,1536,1536,0.02790293296178182
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,1536,6144,0.10974187056223553
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,1536,1536,0.045399467150370285
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,1536,1024,0.020577067136764528
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,1536,1024,0.034329601128896076
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,1536,768,0.016622933745384216
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,1536,768,0.02966933250427246
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,1536,5120,0.09272426764170329
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,1536,512,0.012882133324941
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,1536,512,0.025973333915074663
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,1536,4096,0.0758506695429484
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,1536,256,0.009117866555849712
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,1536,256,0.02112213373184204
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,1536,3584,0.06809493700663248
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,1536,128,0.007405866682529449
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,1536,3072,0.058950400352478026
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,1536,128,0.019859200716018675
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,1536,64,0.006484266618887584
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,1536,64,0.019781333208084107
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,1536,32,0.0067210664351781205
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,1536,2048,0.04405226707458496
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,1536,32,0.019333332777023315
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,1536,1536,0.03584106763203938
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,1536,1024,0.027508266766866046
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,1024,16384,0.16818453470865885
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,1536,768,0.023448532819747923
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,1024,65536,0.6565823872884115
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,1024,12288,0.1265760024388631
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,1024,16384,0.34098453521728517
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,1536,512,0.01954879959424337
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,1024,65536,0.8496490478515625
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,1024,10240,0.10575146675109863
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,1024,12288,0.2652575969696045
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,1024,65536,1.3182411193847656
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,1024,8192,0.08550506432851156
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,1024,10240,0.2209376017252604
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,1024,7168,0.07587626775105795
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,1024,8192,0.17901333173116046
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,1536,256,0.01577279965082804
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,1024,7168,0.15933119455973307
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,1024,6144,0.06599786678949991
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,1024,6144,0.13918719291687012
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,1536,128,0.014388266205787658
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,1024,5120,0.055188266436258945
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,1024,5120,0.11837653319040935
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,1024,16384,0.21984853744506835
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,1024,4096,0.04482239882151286
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,1024,12288,0.16951467196146647
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,1024,4096,0.0963040033976237
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,1024,3584,0.03978559970855713
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,1024,10240,0.14071040153503417
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,1024,3584,0.0857856035232544
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,1024,8192,0.11654293537139893
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,1024,3072,0.034467200438181564
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,1024,3072,0.07535573641459146
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,1024,2560,0.02986239989598592
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,1024,2560,0.06200746695200602
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,1024,2048,0.02477653423945109
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,1024,7168,0.10269760290781657
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,1024,2048,0.05049173434575399
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,1024,1536,0.01971413294474284
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,1024,1536,0.0384117325146993
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,1024,6144,0.08936959902445475
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,1024,1024,0.014542933305104574
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,1024,1024,0.029458133379618327
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,1024,5120,0.07373119990030924
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,1024,768,0.011946666240692138
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,1024,768,0.02595733404159546
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,1024,4096,0.06112000147501627
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,1024,512,0.009657599528630574
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,1024,3584,0.054535468419392906
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,1024,512,0.022732800245285033
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,1024,256,0.007222400108973186
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,1024,256,0.02009920080502828
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,1024,3072,0.0479477326075236
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,1024,128,0.006113066772619883
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,1024,2560,0.04211626847585042
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,1024,128,0.01835413376490275
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,1024,64,0.005419733126958212
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,1024,128,0.011620266238848369
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,1024,64,0.01805866758028666
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,1024,32,0.005598933498064677
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,1024,32,0.017854932943979898
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,1024,2048,0.03552853266398112
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,1024,1536,0.028564266363779706
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,768,65536,0.5154378573099773
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,768,16384,0.1296885331471761
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,1024,1024,0.021719467639923096
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,768,12288,0.09996906916300455
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,768,16384,0.33162879943847656
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,768,12288,0.24502612749735514
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,768,65536,1.234133275349935
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,1024,768,0.01881493330001831
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,768,10240,0.08392852942148844
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,768,8192,0.067740797996521
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,768,10240,0.20618133544921874
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,768,8192,0.1697216033935547
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,768,7168,0.059731201330820716
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,768,7168,0.1506303946177165
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,1024,512,0.015464533368746439
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,768,6144,0.05084693431854248
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,1024,256,0.01246506671110789
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,768,6144,0.13022293249766032
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,768,5120,0.043126400311787924
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,768,5120,0.1115231990814209
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,768,12288,0.15907519658406574
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,768,4096,0.03523840109507243
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,768,4096,0.09016533692677817
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,768,16384,0.20796693166097008
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,768,3584,0.031038933992385866
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,768,65536,0.8227935791015625
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,768,3584,0.08025386333465576
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,768,3072,0.027030400435129803
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,768,3072,0.06809919675191244
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,768,10240,0.13265600204467773
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,768,6144,0.08231253623962402
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,768,2560,0.023031467199325563
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,768,2560,0.056037334601084385
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,768,2048,0.019320533672968546
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,768,2048,0.04434666633605957
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,768,7168,0.09705493450164795
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,768,1536,0.015424000223477683
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,768,8192,0.109443203608195
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,768,1536,0.03526826699574788
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,768,1024,0.011500799655914306
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,768,1024,0.02791680097579956
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,768,5120,0.07025706768035889
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,768,768,0.009683199723561605
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,768,768,0.024694399038950602
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,768,3072,0.044446933269500735
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,768,512,0.007906133433183034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,768,4096,0.05730453332265219
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,768,512,0.021835732460021972
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,768,256,0.006085333228111267
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,768,256,0.02002133329709371
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,768,1536,0.025655466318130492
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,768,128,0.005157333115736643
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,768,2560,0.039255468050638835
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,768,128,0.017893334229787193
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,768,3584,0.051482665538787845
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,768,64,0.004628266890843709
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,768,64,0.017568000157674155
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,768,32,0.00486826648314794
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,768,768,0.016261333227157594
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,768,32,0.017228800058364867
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,768,1024,0.019751467307408652
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,768,2048,0.03197973370552063
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,512,16384,0.09223253726959228
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,512,65536,0.35193281173706054
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,512,16384,0.2974207878112793
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,512,12288,0.06953386465708414
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,512,12288,0.22698453267415367
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,512,65536,1.155908203125
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,512,10240,0.059501866499582924
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,512,10240,0.19194666544596356
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,512,10240,0.10836160182952881
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,512,8192,0.0477343996365865
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,512,8192,0.15748480161031086
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,512,7168,0.042224001884460446
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,512,7168,0.13974720637003582
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,768,256,0.010629333058993022
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,512,6144,0.03554666837056478
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,512,6144,0.12252480189005535
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,768,512,0.013821867108345032
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,512,5120,0.030407466491063434
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,768,128,0.009431466460227966
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,512,5120,0.10392959912618001
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,512,16384,0.17225707372029622
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,512,65536,0.652835210164388
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,512,4096,0.025231999158859254
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,512,3584,0.022087466716766358
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,512,4096,0.08486293156941732
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,512,3584,0.07361493110656739
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,512,12288,0.13085760275522867
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,512,3072,0.018888533115386963
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,512,3072,0.06242560148239136
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,512,2560,0.016525866587956746
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,512,2560,0.05071359872817993
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,512,8192,0.08864426612854004
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,512,2048,0.013611732920010885
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,512,2048,0.040380799770355226
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,512,7168,0.07900266647338867
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,512,1536,0.011181867122650147
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,512,1536,0.03143253326416016
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,512,6144,0.06774506568908692
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,512,1024,0.008642133076985676
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,512,1024,0.026009599367777508
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,512,5120,0.057042133808135984
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,512,768,0.007522133489449819
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,512,768,0.0231989324092865
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,512,4096,0.047533865769704184
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,512,512,0.006233599781990051
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,512,512,0.020657066504160562
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,512,3072,0.03735573291778564
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,512,256,0.005201066533724466
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,512,2560,0.03234240015347799
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,512,256,0.01801066597302755
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,512,128,0.004490666588147481
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,512,128,0.017026132345199584
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,512,128,0.008540800213813782
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,512,64,0.004182399809360504
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,512,64,0.01679146687189738
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,512,32,0.004251733422279358
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,512,3584,0.042232533295949296
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,512,32,0.016586666305859886
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,256,65536,0.21963626543680825
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,512,1536,0.021134932835896812
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,256,16384,0.06364159981409709
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,512,2048,0.02658453385035197
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,256,16384,0.2865941365559896
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,512,1024,0.01636799971262614
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,256,65536,1.0832832336425782
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,256,12288,0.05110506614049276
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,512,768,0.014210133751233419
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,256,10240,0.046692268053690596
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,256,12288,0.21953172683715821
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,256,10240,0.1841109275817871
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,256,8192,0.033614933490753174
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,512,512,0.011732266346613566
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,256,8192,0.15053866704305013
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,256,7168,0.030612266063690184
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,256,7168,0.13345173199971516
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,256,6144,0.02803093393643697
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,256,6144,0.11605013211568196
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,512,256,0.00936853289604187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,256,5120,0.025464532772699992
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,256,5120,0.09942613442738851
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,256,65536,0.584063975016276
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,256,16384,0.15571734110514324
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,256,4096,0.018847999970118205
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,256,4096,0.07992213567097982
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,256,12288,0.11826986471811932
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,256,3584,0.016976000865300496
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,256,10240,0.09976960023244222
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,256,3584,0.06924586296081543
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,256,3072,0.015460266669591268
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,256,3072,0.05731306473414103
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,256,8192,0.07959573268890381
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,256,2560,0.013392000397046407
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,256,7168,0.07053546905517578
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,256,2560,0.04659946759541829
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,256,2048,0.01138879954814911
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,256,2048,0.03622613350550334
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,256,6144,0.06037439902623495
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,256,1536,0.009168000022570292
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,256,1536,0.02850240071614583
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,256,5120,0.051006933053334556
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,256,1024,0.007160533467928569
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,256,1024,0.024077866474787393
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,256,768,0.00602346658706665
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,256,768,0.02167466680208842
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,256,4096,0.041954131921132405
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,256,512,0.005025066435337067
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,256,512,0.019419733683268228
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,256,3072,0.03299199938774109
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,256,256,0.004217599829037985
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,256,256,0.01743146578470866
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,256,2048,0.022887466351191203
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,256,128,0.003737599899371465
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,256,128,0.01653866668542226
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,256,1024,0.014266666769981385
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,256,64,0.003421866645415624
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,256,64,0.015889066457748412
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,256,32,0.003502933432658514
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,256,32,0.01596799989541372
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,256,2560,0.028859732548395793
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,256,3584,0.038127998510996505
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,128,65536,0.20461227099100748
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,128,16384,0.05944000085194906
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,128,16384,0.2792341232299805
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,256,1536,0.018475733200709023
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,256,768,0.012116266290346782
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,128,12288,0.04023466507593791
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,128,65536,1.079968007405599
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,128,10240,0.034082134564717606
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,128,12288,0.21312319437662758
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,128,10240,0.18095040321350098
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,128,8192,0.02775893410046895
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,128,8192,0.14689812660217286
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,256,256,0.008303999900817871
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,128,7168,0.024691200256347655
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,256,512,0.010218666990598042
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,128,7168,0.13111999829610188
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,128,6144,0.02234986623128255
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,256,128,0.007469866673151653
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,128,6144,0.11427946885426839
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,128,5120,0.017899733781814576
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,128,5120,0.09702613353729247
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,128,65536,0.5665002822875976
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,128,16384,0.15010132789611816
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,128,4096,0.010346666971842448
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,128,4096,0.0775221347808838
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,128,12288,0.1149066686630249
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,128,3584,0.009408000111579894
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,128,3584,0.06725119749704997
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,128,3072,0.00925546685854594
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,128,10240,0.09693546295166015
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,128,3072,0.05518293380737305
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,128,2560,0.008138666550318401
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,128,2560,0.043961600462595625
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,128,8192,0.0782037337621053
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,128,2048,0.006477866570154827
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,128,2048,0.034468265374501546
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,128,7168,0.0687935988108317
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,128,1536,0.005740800003210703
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,128,1536,0.029315199454625445
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,128,6144,0.05886613527933756
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,128,1024,0.004953599969546
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,128,5120,0.0497439980506897
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,128,1024,0.024228266874949136
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,128,768,0.004455466568470001
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,128,768,0.021142399311065672
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,128,4096,0.04013226826985677
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,128,512,0.003969066590070724
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,128,512,0.019350399573644005
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,128,3072,0.03146026730537414
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,128,256,0.003522133330504099
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,128,256,0.016701867183049522
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,128,3584,0.0362773338953654
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,128,128,0.0032416000962257386
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,128,128,0.01594239970048269
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,128,2048,0.02167146603266398
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,128,64,0.0030965333183606463
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,128,2560,0.026411734024683636
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,128,64,0.01563093364238739
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,128,32,0.003293866664171219
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,128,32,0.01544319987297058
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,64,65536,0.1969578742980957
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,64,16384,0.05247146685918173
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,64,16384,0.2799040158589681
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,64,12288,0.041867733001708984
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,64,65536,1.0749652862548829
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,64,12288,0.21136852900187172
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,64,10240,0.03522773186365764
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,64,8192,0.02951146761576335
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,128,768,0.011168000102043153
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,64,10240,0.17829972902933758
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,64,7168,0.026748800277709962
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,128,1024,0.013265066345532737
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,64,8192,0.1460960070292155
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,64,6144,0.02156053384145101
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,64,7168,0.12891733646392822
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,64,5120,0.016242133577664693
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,64,6144,0.11215893427530925
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,64,4096,0.00867306689421336
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,64,5120,0.09493760267893472
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,64,3584,0.008089600006739299
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,64,4096,0.07518080075581869
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,64,3584,0.06535786787668864
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,64,3072,0.00855466624101003
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,64,2560,0.007520000139872233
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,64,3072,0.05336533387502035
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,64,2048,0.006776533524195353
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,64,2560,0.04250133434931437
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,128,1536,0.017425066232681273
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,64,1536,0.005734399954477946
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,64,2048,0.033070933818817136
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,64,1536,0.028118399779001872
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,64,1024,0.00462719996770223
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,64,1024,0.023613866170247397
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,64,768,0.00417493333419164
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,64,768,0.021699200073877968
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,64,512,0.018936532735824584
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,64,512,0.0037962667644023894
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,64,256,0.003402666747570038
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,64,256,0.01746666630109151
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,64,128,0.003366400053103765
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,64,64,0.003033600002527237
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,64,128,0.015845333536465965
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,64,64,0.015517866611480713
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,64,32,0.0030421334008375804
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,64,32,0.015177599589029946
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,32,16384,0.051252265771230064
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,32,65536,0.18979627291361492
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,32,12288,0.040140799681345624
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,32,10240,0.03418240149815877
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,32,16384,0.27884480158487956
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,32,12288,0.21102399826049806
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,32,8192,0.02834560076395671
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,32,10240,0.17847572962443034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,32,7168,0.025568000475565594
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,128,512,0.009304533402125042
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,128,256,0.007421866556008657
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,32,6144,0.022793600956598915
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,32,8192,0.1454047997792562
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,32,7168,0.1290079991022746
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,32,5120,0.01590826710065206
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,32,6144,0.1111029307047526
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,32,4096,0.008405333757400513
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,32,5120,0.09459520181020101
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,32,3584,0.007417599856853485
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,32,3072,0.006951466699441274
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,32,65536,1.0724149068196616
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,32,3072,0.05221013228098551
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,32,4096,0.07490560213724771
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,32,3584,0.0634389321009318
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,32,2560,0.006116266548633576
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,32,2560,0.04144533475240071
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,32,2048,0.005544533332188925
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4096,128,128,0.006541866560777028
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,32,2048,0.032407466570536295
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,32,1536,0.005130666494369507
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,32,1024,0.004220800101757049
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,32,1536,0.027319467067718504
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,32,1024,0.023770666122436522
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,32,768,0.0038773333032925926
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,32,768,0.02095573345820109
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,32,512,0.003562666724125544
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,32,256,0.0032042667269706728
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,32,512,0.019036799669265747
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,32,128,0.0029578665892283124
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,32,256,0.016985599199930826
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,32,64,0.0027413333455721537
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,32,128,0.0159850666920344
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4096,32,32,0.0029578665892283124
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,32,64,0.015582933028539022
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4096,32,32,0.015056000153223673
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,65536,10240,1.9503669738769531
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,65536,12288,2.3423189798990887
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,65536,12288,2.9008415222167967
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,65536,16384,3.39422607421875
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,65536,10240,3.86231689453125
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,65536,12288,4.469605509440104
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,65536,8192,1.618930180867513
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,65536,16384,6.02168935139974
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,65536,8192,2.9958773295084637
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,65536,7168,1.4288682301839193
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,65536,7168,2.6420799255371095
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,65536,6144,1.2234816233317056
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,65536,16384,3.8832351684570314
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,65536,6144,2.2821121215820312
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,65536,10240,2.4874593098958333
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,65536,5120,1.0666624069213868
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,65536,5120,1.9098805745442706
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,65536,8192,2.050714619954427
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,65536,4096,0.8579551696777343
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,65536,4096,1.481288528442383
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,65536,3584,0.707913589477539
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,65536,7168,1.7813791910807293
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,65536,3584,1.3166207631429037
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,65536,3584,0.900546137491862
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,65536,3072,0.6296938578287761
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,65536,6144,1.3692522684733073
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,65536,3072,1.190835189819336
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,65536,2560,0.9828213373819986
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,65536,2560,0.5246111869812011
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,65536,5120,1.210546112060547
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,65536,2048,0.7737173080444336
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,65536,2048,0.4618517239888509
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,65536,1536,0.34541546503702797
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,65536,1536,0.6226687749226888
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,65536,4096,1.01571839650472
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,65536,1024,0.38164265950520837
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,65536,1024,0.26983572642008463
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,65536,768,0.30274454752604163
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,65536,768,0.21839466094970703
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,65536,512,0.22003413836161295
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,65536,512,0.16720213890075683
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,65536,3072,0.7223434448242188
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,65536,256,0.143014399210612
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,65536,256,0.13108800252278646
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,65536,128,0.11718186537424724
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,65536,2560,0.615715217590332
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,65536,128,0.12120107014973958
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,65536,64,0.11603946685791015
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,65536,64,0.1204032023747762
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,65536,2048,0.5223829269409179
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,65536,32,0.1129418690999349
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,65536,32,0.11878293355305988
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,65536,1536,0.4228810628255208
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,65536,1024,0.3231594721476237
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,65536,768,0.29126294453938806
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,65536,512,0.23613120714823405
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,65536,256,0.20019733111063637
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,16384,16384,1.4724299112955728
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,16384,16384,0.833135986328125
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,65536,128,0.18692053159077962
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,16384,16384,0.9446005503336588
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,16384,12288,0.6654442469278972
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,16384,12288,1.128820292154948
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,16384,10240,0.585095469156901
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,16384,10240,0.916705067952474
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,16384,65536,3.5583114624023438
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,16384,8192,0.7638357162475586
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,16384,8192,0.4363402684529622
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,16384,8192,0.5456234614054363
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,16384,65536,4.091037750244141
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,16384,7168,0.632862917582194
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,16384,7168,0.3908810615539551
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,16384,6144,0.36084372202555337
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,16384,6144,0.5878623962402344
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,16384,6144,0.3661696116129557
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,16384,5120,0.440447998046875
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,16384,5120,0.28917865753173827
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,16384,4096,0.24919145901997886
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,16384,4096,0.3496042569478353
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,16384,4096,0.24898026784261068
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,16384,3584,0.21774826049804688
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,16384,3584,0.3298442522684733
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,16384,65536,5.932549540201823
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,16384,3072,0.1883285363515218
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,16384,3072,0.2662303924560547
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,16384,12288,0.7168405532836915
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,16384,2560,0.2261194705963135
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,16384,2560,0.15470399856567382
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,16384,2560,0.1935263951619466
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,16384,2048,0.18342827161153158
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,16384,2048,0.12948799928029378
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,16384,10240,0.6009781519571941
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,16384,1536,0.14026986757914225
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,16384,1536,0.10488639672597248
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,16384,1024,0.09866773287455241
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,16384,1024,0.07998826503753662
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,16384,1024,0.09630293051401774
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,16384,768,0.07775359948476156
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,16384,768,0.07043626308441162
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,16384,512,0.05809280077616373
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,16384,512,0.05622506539026896
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,16384,7168,0.4083957354227702
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,16384,256,0.037910401821136475
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,16384,256,0.04896746476491292
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,16384,5120,0.2996405283610026
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,16384,128,0.03339626789093018
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,16384,128,0.04513706763585408
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,16384,3584,0.2190549373626709
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,16384,64,0.03068266709645589
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,16384,64,0.04491626818974813
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,16384,3072,0.19471999804178874
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,16384,32,0.030552534262339275
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,16384,32,0.04522560040156047
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,16384,2048,0.13990933100382488
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,16384,1536,0.11925973097483318
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,16384,768,0.0816480000813802
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,16384,512,0.06875306765238444
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,16384,256,0.0567573348681132
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,12288,16384,1.1159498850504557
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,12288,16384,0.6656703948974609
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,16384,128,0.05265493392944336
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,12288,12288,0.554309336344401
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,12288,12288,0.7888512293497721
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,12288,65536,2.776373291015625
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,12288,10240,0.6810410817464192
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,12288,10240,0.49083627065022783
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,12288,8192,0.34558293024698894
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,12288,8192,0.5826954523722331
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,12288,65536,4.417779032389323
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,12288,7168,0.5003413200378418
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,12288,7168,0.3102677345275879
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,12288,6144,0.38566614786783854
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,12288,16384,0.7292074839274089
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,12288,6144,0.2636810620625814
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,12288,5120,0.3252671877543131
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,12288,5120,0.2234272003173828
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,12288,12288,0.5259466807047526
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,12288,4096,0.2613162676493327
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,12288,4096,0.179912535349528
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,12288,10240,0.44431467056274415
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,12288,3584,0.23326400121053062
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,12288,3584,0.16633493105570477
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,12288,65536,3.1535990397135416
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,12288,8192,0.3539509455362956
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,12288,3072,0.20267093976338707
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,12288,3072,0.18061013221740724
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,12288,2560,0.17080853780110677
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,12288,2560,0.13375040690104167
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,12288,2560,0.1286890665690104
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,12288,2048,0.14658026695251464
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,12288,7168,0.3199680010477702
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,12288,2048,0.10453866322835285
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,12288,1536,0.10819946924845378
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,12288,1536,0.08507200082143149
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,12288,1024,0.07514346440633138
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,12288,1024,0.06645119984944661
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,12288,6144,0.27104107538859046
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,12288,768,0.059400534629821776
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,12288,768,0.056789334615071616
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,12288,5120,0.22781119346618653
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,12288,512,0.04448426564534505
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,12288,512,0.046855465571085615
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,12288,4096,0.18844159444173175
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,12288,256,0.03198080062866211
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,12288,256,0.040532267093658446
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,12288,3584,0.16783572832743326
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,12288,128,0.02550293405850728
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,12288,128,0.0371829350789388
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,12288,3072,0.1471776008605957
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,12288,64,0.024732800324757893
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,12288,64,0.03856213490168254
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,12288,32,0.025243733326594037
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,12288,32,0.03877439896265666
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,12288,1536,0.09042879740397135
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,12288,2048,0.1089290698369344
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,12288,1024,0.07105173269907633
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,12288,768,0.06217813491821289
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,10240,16384,0.912167485555013
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,12288,512,0.051926398277282716
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,10240,16384,0.5773962656656901
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,12288,256,0.0440778652826945
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,10240,12288,0.6668938954671224
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,10240,12288,0.45764907201131183
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,12288,128,0.04095253149668376
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,10240,65536,2.390101369222005
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,10240,10240,0.3766144116719564
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,10240,10240,0.5597024281819661
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,10240,8192,0.4541141192118327
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,10240,8192,0.31333014170328777
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,10240,65536,3.821854909261068
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,10240,7168,0.39869759877522787
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,10240,7168,0.2662357330322266
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,10240,7168,0.295085875193278
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,10240,6144,0.22834134101867676
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,10240,6144,0.33785813649495444
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,10240,5120,0.276476796468099
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,10240,5120,0.20932480494181313
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,10240,4096,0.228330659866333
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,10240,4096,0.16664212544759113
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,10240,16384,0.6046431859334309
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,10240,3584,0.2048192024230957
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,10240,3584,0.15655466715494792
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,10240,12288,0.43920106887817384
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,10240,3072,0.16940800348917645
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,10240,10240,0.3723594665527344
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,10240,65536,2.5596628824869794
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,10240,3072,0.13041706879933673
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,10240,2560,0.1808085282643636
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,10240,2560,0.11322346528371174
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,10240,8192,0.300546137491862
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,10240,2048,0.11712213357289632
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,10240,2048,0.09284906387329102
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,10240,1536,0.091539200146993
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,10240,1536,0.07719573179880777
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,10240,1536,0.07688000202178955
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,10240,1024,0.06437866687774658
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,10240,6144,0.22738240559895834
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,10240,1024,0.05985813140869141
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,10240,768,0.05160640080769857
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,10240,5120,0.19441386858622234
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,10240,768,0.05192960103352865
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,10240,4096,0.16262933413187664
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,10240,512,0.04027946790059407
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,10240,512,0.04475413163503011
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,10240,3584,0.14306772549947103
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,10240,256,0.028227200110753376
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,10240,256,0.03594026565551758
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,10240,3072,0.12543360392252606
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,10240,128,0.02221333384513855
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,10240,128,0.03315733273824056
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,10240,128,0.03554346561431885
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,10240,64,0.019720532496770225
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,10240,2560,0.1086464007695516
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,10240,64,0.03358826637268066
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,10240,2048,0.0932266632715861
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,10240,32,0.020362667242685952
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,10240,32,0.03403840065002441
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,10240,1024,0.05973653395970663
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,10240,768,0.05303786595662435
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,8192,16384,0.6753386815388998
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,8192,16384,0.4810485204060872
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,10240,512,0.04485226472218831
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,8192,65536,1.9333087921142578
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,8192,12288,0.5421183904012044
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,8192,12288,0.37355305353800455
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,10240,256,0.037887998421986896
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,8192,10240,0.3450720151265462
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,8192,10240,0.45175787607828777
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,8192,8192,0.26761385599772136
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,8192,8192,0.3461930592854818
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,8192,65536,3.1295616149902346
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,8192,7168,0.30124588012695314
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,8192,7168,0.23954453468322753
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,8192,6144,0.26047040621439616
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,8192,6144,0.19706133206685383
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,8192,5120,0.22014187177022299
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,8192,5120,0.1644437313079834
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,8192,16384,0.4886058807373047
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,8192,4096,0.18444587389628092
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,8192,65536,2.184190877278646
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,8192,12288,0.3689909299214681
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,8192,4096,0.14528212547302247
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,8192,3584,0.15897812843322753
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,8192,10240,0.3035242716471354
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,8192,3584,0.12459839979807537
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,8192,3584,0.11824320157368977
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,8192,3072,0.13508373896280926
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,8192,3072,0.10835946400960286
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,8192,2560,0.11448746522267658
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,8192,2560,0.09427200158437093
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,8192,8192,0.24548160235087074
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,8192,2048,0.09364586671193441
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,8192,2048,0.0794538656870524
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,8192,7168,0.21851305961608886
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,8192,1536,0.07177600065867105
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,8192,1536,0.06625813245773315
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,8192,1024,0.051436801751454674
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,8192,1024,0.052475734551747644
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,8192,6144,0.1868874708811442
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,8192,768,0.04188799858093262
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,8192,768,0.04550506671269734
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,8192,5120,0.1607338587443034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,8192,512,0.03204693396886189
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,8192,512,0.038891732692718506
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,8192,512,0.037725865840911865
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,8192,256,0.0220853328704834
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,8192,256,0.032075732946395874
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,8192,4096,0.13217919667561848
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,8192,128,0.0169813334941864
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,8192,128,0.028818132479985555
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,8192,3072,0.10474026997884114
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,8192,64,0.014552533626556396
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,8192,64,0.029684267441431683
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,8192,2560,0.08988373279571533
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,8192,32,0.015681067109107973
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,8192,2048,0.07703253428141275
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,8192,32,0.029763199885686237
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,8192,1536,0.0636629343032837
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,8192,1024,0.049993598461151124
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,7168,16384,0.6066336313883464
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,8192,768,0.04439786672592163
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,7168,16384,0.43473386764526367
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,7168,12288,0.4441354751586914
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,7168,12288,0.33047574361165366
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,8192,256,0.03165546655654907
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,7168,65536,1.7706432342529297
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,7168,65536,1.8594100952148438
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,7168,10240,0.3730389277140299
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,7168,10240,0.278767999013265
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,7168,10240,0.2870400110880534
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,7168,8192,0.23234559694925944
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,7168,8192,0.343832524617513
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,7168,65536,2.7822303771972656
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,8192,128,0.02911253372828166
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,7168,7168,0.2646944046020508
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,7168,7168,0.19944532712300617
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,7168,6144,0.23310720125834145
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,7168,6144,0.1695679982503255
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,7168,5120,0.18984746932983398
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,7168,4096,0.15307733217875164
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,7168,5120,0.15701440175374348
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,7168,4096,0.12361386617024739
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,7168,16384,0.4311221440633138
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,7168,3584,0.13406933148701985
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,7168,3584,0.10848426818847656
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,7168,12288,0.31997760136922204
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,7168,3072,0.1156010627746582
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,7168,3072,0.09550506273905436
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,7168,8192,0.2217631975809733
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,7168,2560,0.0974783976872762
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,7168,7168,0.19389653205871582
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,7168,2560,0.08202880223592122
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,7168,6144,0.16871679623921712
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,7168,2048,0.07869333426157633
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,7168,2048,0.06899627049763998
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,7168,5120,0.14285866419474286
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,7168,1536,0.06012586752573649
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,7168,1536,0.055864532788594566
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,7168,1024,0.04229973157246907
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,7168,1024,0.04366613229115804
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,7168,4096,0.11951786677042645
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,7168,768,0.03402773141860962
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,7168,768,0.03822933435440064
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,7168,768,0.040858666102091476
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,7168,512,0.025240532557169598
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,7168,512,0.03328106602032979
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,7168,256,0.01785279909769694
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,7168,256,0.029308799902598066
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,7168,3584,0.1061791976292928
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,7168,128,0.01436906655629476
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,7168,128,0.02701440056165059
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,7168,3072,0.09341013431549072
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,7168,64,0.013692800203959146
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,7168,64,0.028113067150115967
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,7168,32,0.014522666732470194
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,7168,2560,0.07957759698232016
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,7168,32,0.027536000808080035
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,7168,2048,0.06957226594289144
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,7168,1536,0.0577621340751648
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,7168,1024,0.04596373240152995
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,6144,16384,0.5101056098937988
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,6144,16384,0.3950528144836426
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,7168,512,0.03375146786371867
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,6144,65536,1.5439125061035157
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,6144,12288,0.3854858716328939
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,7168,256,0.02891626755396525
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,6144,12288,0.3024863878885905
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,6144,65536,2.327025095621745
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,6144,10240,0.3469194730122884
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,6144,10240,0.2539509296417236
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,6144,8192,0.2191989262898763
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,6144,8192,0.272161070505778
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,7168,128,0.026524800062179565
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,6144,7168,0.22887999216715493
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,6144,7168,0.18362986246744792
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,6144,6144,0.19757547378540039
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,6144,6144,0.1599125385284424
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,6144,5120,0.16342506408691407
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,6144,5120,0.13290879726409913
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,6144,16384,0.3728447914123535
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,6144,65536,1.6140501658121746
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,6144,4096,0.13383893966674804
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,6144,4096,0.10413973331451416
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,6144,12288,0.280293337504069
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,6144,4096,0.13664639790852864
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,6144,10240,0.24015679359436035
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,6144,3584,0.11838080088297527
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,6144,3584,0.10134080251057942
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,6144,3072,0.10446186860402425
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,6144,8192,0.18927253087361653
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,6144,3072,0.08963519732157389
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,6144,3072,0.08088640371958414
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,6144,2560,0.08815999825795492
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,6144,2560,0.078875732421875
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,6144,2560,0.0705941359202067
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,6144,2048,0.07069439888000488
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,6144,2048,0.06794453461964925
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,6144,1536,0.05526080131530762
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,6144,7168,0.16786346435546876
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,6144,1536,0.056194134553273524
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,6144,1024,0.04002133210500081
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,6144,1024,0.04456426699956258
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,6144,768,0.03295573393503825
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,6144,768,0.03852266470591227
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,6144,512,0.024971733490626015
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,6144,6144,0.14753066698710124
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,6144,512,0.03320639928181966
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,6144,256,0.017898666858673095
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,6144,256,0.0279423991839091
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,6144,5120,0.1257546663284302
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,6144,128,0.014138666788736978
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,6144,128,0.026341332991917925
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,6144,128,0.0227018674214681
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,6144,64,0.012133333086967468
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,6144,64,0.02576853235562642
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,6144,32,0.012602667013804117
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,6144,32,0.025937066475550337
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,6144,3584,0.0930186669031779
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,6144,2048,0.06053119897842407
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,6144,1536,0.05055466492970785
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,6144,1024,0.039579733212788897
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,6144,768,0.03483946720759074
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,5120,65536,1.8753088633219401
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,6144,512,0.029308799902598066
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,5120,16384,0.4234890619913737
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,5120,16384,0.3522240002950033
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,6144,256,0.024807467063268026
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,5120,12288,0.3222559928894043
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,5120,12288,0.279908275604248
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,5120,65536,1.3803797403971354
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,5120,12288,0.24450559616088868
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,5120,10240,0.2665130615234375
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,5120,10240,0.2332085291544596
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,5120,8192,0.18312106132507325
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,5120,8192,0.22335467338562012
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,5120,8192,0.1682410717010498
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,5120,7168,0.19094187418619793
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,5120,7168,0.16051947275797526
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,5120,6144,0.16272427241007487
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,5120,6144,0.14074026743570964
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,5120,5120,0.13697919845581055
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,5120,5120,0.11847573121388752
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,5120,4096,0.11185706456502278
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,5120,4096,0.09882559776306152
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,5120,16384,0.3215797424316406
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,5120,3584,0.09832320213317872
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,5120,3584,0.08815466562906901
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,5120,10240,0.2025013287862142
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,5120,65536,1.3155914306640626
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,5120,3072,0.08473066488901773
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,5120,2560,0.07178133328755697
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,5120,3072,0.09474559624989828
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,5120,7168,0.1484021345774333
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,5120,2560,0.07029439608256022
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,5120,2048,0.05936319828033447
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,5120,6144,0.12826240062713623
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,5120,2048,0.05986560185750326
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,5120,1536,0.046087467670440675
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,5120,1536,0.04945493141810099
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,5120,5120,0.10760107040405273
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,5120,1024,0.03349013328552246
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,5120,1024,0.038423466682434085
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,5120,768,0.02658560077349345
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,5120,768,0.033326933781305954
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,5120,4096,0.08979520003000895
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,5120,512,0.020371200640996297
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,5120,512,0.02962239980697632
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,5120,512,0.026040534178415935
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,5120,256,0.014460800091425577
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,5120,256,0.02524799903233846
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,5120,3584,0.0800383965174357
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,5120,128,0.01071573297182719
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,5120,128,0.022163200378417968
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,5120,3072,0.07128000259399414
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,5120,2560,0.0618282675743103
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,5120,64,0.009225599964459737
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,5120,64,0.022272000710169472
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,5120,32,0.009451733032862345
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,5120,2048,0.05301440159479777
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,5120,32,0.022348799308141074
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,5120,1536,0.044565331935882566
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,5120,1024,0.03522239923477173
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,4096,16384,0.3380832036336263
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,5120,768,0.030972800652186078
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,4096,16384,0.3045728047688802
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,4096,12288,0.25203946431477864
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,5120,256,0.021461333831151327
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,4096,12288,0.23200319608052572
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,4096,65536,1.106696573893229
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,4096,65536,1.193882624308268
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,4096,65536,1.491002655029297
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,4096,10240,0.21293973922729492
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,4096,10240,0.19601173400878907
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,4096,8192,0.17296853065490722
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,4096,10240,0.18853227297465008
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,4096,7168,0.14980799357096355
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,4096,8192,0.17936746279398602
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,4096,7168,0.14445439974466961
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,4096,6144,0.1291925350824992
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,4096,6144,0.12765653133392335
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,4096,5120,0.10855573018391926
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,4096,5120,0.10665280024210613
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,4096,4096,0.08973973592122396
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,4096,4096,0.0899786631266276
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,4096,4096,0.07798293431599936
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,5120,128,0.019509333372116088
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,4096,3584,0.07953386306762696
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,4096,3584,0.07796906630198161
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,4096,3072,0.06737173398335775
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,4096,3072,0.06963733037312826
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,4096,16384,0.26725972493489586
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,4096,2560,0.05729706684748331
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,4096,12288,0.2013706684112549
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,4096,2560,0.0610975980758667
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,4096,2048,0.04699093500773112
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,4096,2048,0.051361068089803064
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,4096,8192,0.13913920720418294
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,4096,1536,0.03648853302001953
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,4096,7168,0.12292160193125408
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,4096,1536,0.04136213461558024
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,4096,6144,0.10729173024495442
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,4096,1024,0.02671146591504415
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,4096,5120,0.09043093522389731
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,4096,1024,0.033655468622843424
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,4096,768,0.021834667523701987
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,4096,768,0.02967360019683838
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,4096,3584,0.06764159997304281
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,4096,512,0.016795732577641807
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,4096,512,0.025890133778254193
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,4096,512,0.021627734104792275
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,4096,256,0.01165013313293457
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,4096,256,0.022836265961329143
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,4096,3072,0.058650668462117514
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,4096,128,0.00876693328221639
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,4096,128,0.020657066504160562
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,4096,2560,0.05158079862594604
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,4096,64,0.00769706666469574
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,4096,64,0.020363734165827433
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,4096,32,0.008147199948628742
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,4096,2048,0.04441386858622233
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,4096,32,0.020537600914637247
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,4096,1536,0.03668373425801595
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,4096,1024,0.029150933027267456
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,3584,16384,0.31914558410644533
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,4096,768,0.025293866793314617
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,3584,65536,1.1442400614420571
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,3584,65536,1.3910719553629556
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,3584,16384,0.2989376068115234
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,3584,12288,0.2431392033894857
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,3584,10240,0.20188159942626954
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,3584,12288,0.23853866259256998
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,3584,10240,0.19544960657755533
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,3584,8192,0.17033599217732748
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,4096,256,0.01800000071525574
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,3584,8192,0.15696746508280437
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,4096,128,0.016396799683570863
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,3584,7168,0.14628480275472006
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,3584,7168,0.13837653795878094
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,3584,6144,0.12635200023651122
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,3584,6144,0.12003093560536701
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,3584,5120,0.10410880247751872
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,3584,5120,0.1025055964787801
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,3584,12288,0.20682454109191895
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,3584,4096,0.08449386755625407
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,3584,4096,0.08530133565266927
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,3584,65536,1.1223487854003906
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,3584,6144,0.10798506736755371
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,3584,3584,0.08425172964731852
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,3584,3584,0.07706240018208822
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,3584,16384,0.2743114789326986
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,3584,3072,0.06564373175303141
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,3584,3072,0.067795197168986
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,3584,2560,0.055003734429677334
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,3584,10240,0.17520106633504232
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,3584,2560,0.05838506619135538
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,3584,2048,0.04511679808298747
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,3584,2048,0.04880853494008382
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,3584,7168,0.12428159713745117
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,3584,1536,0.03504000107447307
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,3584,1536,0.040822398662567136
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,3584,8192,0.14091307322184246
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,3584,1024,0.02597973346710205
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,3584,1024,0.032824534177780154
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,3584,5120,0.09044053554534912
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,3584,768,0.021041067441304524
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,3584,768,0.029262934128443403
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,3584,3072,0.05754453341166178
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,3584,512,0.016641066471735636
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,3584,4096,0.07379199663798014
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,3584,512,0.02611733277638753
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,3584,256,0.011307733257611592
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,3584,256,0.022668800751368203
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,3584,1536,0.033062400420506795
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,3584,128,0.00848426620165507
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,3584,128,0.020536533991495767
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,3584,2560,0.05015679995218912
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,3584,64,0.007316266496976216
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,3584,64,0.020388267437616982
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,3584,32,0.00787306676308314
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,3584,32,0.020488532384236653
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,3584,3584,0.06568320194880167
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,3584,768,0.021203200022379555
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,3584,2048,0.04177173376083374
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,3072,65536,1.0500277201334636
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,3072,65536,1.0280586878458657
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,3072,16384,0.26363733609517415
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,3072,16384,0.29853973388671873
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,3584,1024,0.026602667570114136
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,3072,12288,0.19913172721862793
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,3072,12288,0.21569280624389647
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,3072,10240,0.16688426335652667
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,3072,10240,0.176693328221639
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,3072,10240,0.1421130657196045
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,3072,8192,0.13210346698760986
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,3072,8192,0.13975680669148763
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,3072,7168,0.11778879960378011
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,3072,7168,0.12242133617401123
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,3584,256,0.014453333616256715
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,3072,6144,0.10266346931457519
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,3072,6144,0.10640959739685059
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,3072,65536,0.8382389068603515
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,3584,512,0.017307732502619425
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,3072,5120,0.08497599760691324
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,3584,128,0.01243946651617686
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,3072,16384,0.21543679237365723
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,3072,5120,0.09023040135701497
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,3072,4096,0.06804373264312744
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,3072,4096,0.07605439821879069
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,3072,12288,0.16477546691894532
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,3072,3584,0.060651731491088864
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,3072,3584,0.06852800051371256
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,3072,3584,0.054771200815836585
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,3072,3072,0.052342398961385095
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,3072,3072,0.06070826848347982
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,3072,2560,0.044344532489776614
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,3072,2560,0.05210773150126139
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,3072,8192,0.11321173508961994
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,3072,2048,0.03631680011749268
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,3072,2048,0.043917866547902425
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,3072,7168,0.10079253514607747
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,3072,1536,0.028495999177296956
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,3072,1536,0.03682986497879028
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,3072,6144,0.08670506477355958
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,3072,1024,0.020750933885574342
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,3072,1024,0.030239999294281006
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,3072,5120,0.07311039765675863
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,3072,768,0.017017600933710735
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,3072,4096,0.061178668340047204
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,3072,768,0.02100906570752462
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,3072,768,0.02682346701622009
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,3072,512,0.013212800025939941
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,3072,512,0.02330026626586914
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,3072,512,0.017812265952428182
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,3072,256,0.009081600109736125
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,3072,256,0.0206112007300059
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,3072,128,0.007250133156776428
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,3072,128,0.01956160068511963
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,3072,3072,0.048376532395680745
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,3072,64,0.0064181332786877945
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,3072,32,0.006660266717274983
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,3072,64,0.019203199942906698
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,3072,32,0.019099734226862588
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,3072,2560,0.042124799887339276
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,3072,2048,0.03639253377914429
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,2560,16384,0.20862933794657387
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,3072,1536,0.030513066053390502
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,2560,65536,0.8284917195638022
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,2560,16384,0.24152746200561523
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,2560,65536,0.9121877034505209
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,2560,12288,0.1728480021158854
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,2560,12288,0.18608214060465494
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,2560,10240,0.13400640487670898
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,2560,10240,0.1568618615468343
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,2560,8192,0.10652906894683838
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,2560,8192,0.12805120150248211
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,3072,1024,0.024280534187952677
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,2560,7168,0.09376213550567628
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,2560,7168,0.11334719657897949
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,3072,256,0.015176533659299215
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,3072,128,0.013912533720334372
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,2560,6144,0.0826090653737386
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,2560,6144,0.09762667020161947
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,2560,5120,0.06799573103586833
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,2560,5120,0.08373653093973796
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,2560,12288,0.1718506654103597
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,2560,4096,0.05563413302103678
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,2560,4096,0.06919573148091634
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,2560,65536,0.909771728515625
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,2560,3584,0.057144534587860105
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,2560,3584,0.07020053068796793
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,2560,16384,0.2195711930592855
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,2560,3072,0.043229866027832034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,2560,3072,0.05495466788609823
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,2560,10240,0.1491487979888916
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,2560,6144,0.08543039957682291
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,2560,2560,0.03648853302001953
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,2560,2560,0.04710826476414998
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,2560,2048,0.030716800689697267
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,2560,2048,0.04021439949671428
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,2560,7168,0.09989973704020182
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,2560,1536,0.024154667059580484
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,2560,1536,0.03389439980189006
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,2560,8192,0.11462720235188802
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,2560,1024,0.01798506577809652
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,2560,1024,0.028171734015146895
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,2560,5120,0.0730410655339559
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,2560,768,0.014881066481272378
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,2560,768,0.025538132588068647
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,2560,3072,0.04605120023091634
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,2560,512,0.011618133385976155
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,2560,4096,0.05976959864298502
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,2560,512,0.022201599677403767
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,2560,256,0.00843946635723114
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,2560,256,0.020090667406717937
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,2560,1536,0.026557866732279462
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,2560,128,0.006916266679763794
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,2560,128,0.01890773375829061
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,2560,2560,0.040897067387898764
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,2560,64,0.006306133170922597
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,2560,64,0.018759467204411826
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,2560,3584,0.053607467810312906
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,2560,32,0.00643093337615331
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,2560,32,0.01886720061302185
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,2560,1024,0.021499733130137123
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,2560,2048,0.03391573429107666
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,2560,768,0.01690666675567627
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,2048,65536,0.6696330388387044
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,2048,16384,0.16446399688720703
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,2048,16384,0.23886399269104003
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,2048,12288,0.141592534383138
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,2048,65536,0.8237152099609375
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,2048,12288,0.17138986587524413
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,2048,12288,0.124726398785909
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,2048,10240,0.1060373306274414
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,2048,10240,0.14283946355183919
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,2048,8192,0.08506986300150553
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,2048,8192,0.1160650650660197
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,2560,256,0.01184213360150655
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,2048,7168,0.07555733521779379
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,2560,128,0.010081066687901815
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,2048,7168,0.10238080024719239
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,2048,7168,0.07591040134429931
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,2048,6144,0.06473066806793212
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,2048,6144,0.08850346406300863
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,2048,5120,0.05457066694895426
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,2048,5120,0.07546666463216146
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,2048,5120,0.05572266578674316
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,2048,4096,0.04480746587117513
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,2048,16384,0.16384746233622233
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,2048,4096,0.06246826648712158
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,2560,512,0.013901866475741067
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,2048,65536,0.6480469385782878
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,2048,3584,0.0394048015276591
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,2048,3584,0.055137066046396885
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,2048,3072,0.038396799564361574
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,2048,3072,0.04823466539382935
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,2048,2560,0.03072426716486613
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,2048,2560,0.04182933171590169
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,2048,2048,0.024550400177637734
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,2048,10240,0.10637546380360921
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,2048,2048,0.03581546545028687
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,2048,2048,0.027932800849278766
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,2048,1536,0.019811199108759562
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,2048,1536,0.030052266518274945
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,2048,1024,0.014819199840227762
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,2048,8192,0.0861290693283081
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,2048,1024,0.02550826668739319
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,2048,768,0.012116266290346782
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,2048,768,0.02300800085067749
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,2048,6144,0.0654752016067505
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,2048,512,0.00957973301410675
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,2048,512,0.020980266729990642
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,2048,4096,0.04716266791025798
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,2048,256,0.007328000168005626
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,2048,3584,0.04159040053685506
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,2048,256,0.019282132387161255
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,2048,128,0.006100266675154368
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,2048,3072,0.03713599840799968
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,2048,2560,0.03257173299789429
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,2048,128,0.017884800831476845
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,2048,64,0.005406933526198069
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,2048,64,0.017643733819325765
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,2048,32,0.005767466624577841
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,2048,32,0.01798293391863505
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,2048,1536,0.023180800676345825
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,2048,1024,0.01837973395983378
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,1536,65536,0.5223445256551107
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,2048,768,0.015940266847610473
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,1536,16384,0.12855359713236492
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,1536,65536,0.7502304077148437
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,1536,16384,0.2091829299926758
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,1536,12288,0.09785599708557129
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,1536,10240,0.08184746901194254
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,1536,12288,0.15258132616678874
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,1536,10240,0.12962666352589924
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,1536,8192,0.0661525328954061
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,1536,8192,0.10613546371459961
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,2048,512,0.013739732901255288
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,1536,7168,0.05909226735432943
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,1536,7168,0.0939349333445231
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,2048,256,0.011828266580899556
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,2048,128,0.010758399963378906
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,1536,6144,0.05039466619491577
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,1536,6144,0.08104320367177328
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,1536,5120,0.04266453186670939
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,1536,5120,0.0689141352971395
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,1536,12288,0.1168394645055135
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,1536,4096,0.03540053367614746
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,1536,4096,0.055986134211222324
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,1536,65536,0.5775957107543945
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,1536,16384,0.15181867281595868
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,1536,3584,0.031649067004521685
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,1536,3584,0.048794666926066085
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,1536,3072,0.02696853280067444
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,1536,3072,0.04257173140843709
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,1536,10240,0.09755626519521078
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,1536,2560,0.02307093342145284
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,1536,2560,0.03723520040512085
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,1536,6144,0.059077334403991696
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,1536,2048,0.019283199310302736
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,1536,8192,0.08032853603363037
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,1536,7168,0.06937173207600912
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,1536,2048,0.03261546691258748
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,1536,1536,0.015762133399645488
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,1536,1536,0.02811093330383301
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,1536,5120,0.05256213347117106
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,1536,1024,0.011570133765538533
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,1536,1024,0.023360000054041544
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,1536,768,0.009729066491127014
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,1536,768,0.021269333362579346
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,1536,3072,0.033394134044647215
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,1536,512,0.007806933422883351
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,1536,4096,0.04330026706059774
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,1536,512,0.01999680002530416
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,1536,256,0.006135466694831848
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,1536,256,0.018396800756454466
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,1536,3584,0.039122132460276286
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,1536,2560,0.029491200049718218
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,1536,128,0.005205333232879639
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,1536,1536,0.01962560017903646
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,1536,128,0.01683626572291056
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,1536,64,0.004609066744645437
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,1536,64,0.0168938676516215
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,1536,32,0.004855466882387797
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,1536,32,0.016787199179331462
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,1024,65536,0.34063679377237954
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,1024,65536,0.6634421030680339
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,1024,16384,0.08847253322601319
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,1024,16384,0.17606080373128255
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,1536,2048,0.02464853326479594
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,1024,12288,0.06757439772288004
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,1024,12288,0.13695893287658692
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,1536,1024,0.015983999768892924
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,1024,10240,0.05568000078201294
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,1536,768,0.013482667009035745
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,1024,10240,0.11621332963307698
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,1024,8192,0.0451904018719991
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,1024,8192,0.09451200167338053
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,1536,256,0.010084266463915508
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,1024,7168,0.040319999059041337
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,1024,7168,0.08390613396962485
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,1024,7168,0.052414933840433754
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,1024,6144,0.034983468055725095
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,1536,512,0.01192639966805776
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,1024,65536,0.4289557456970215
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,1024,6144,0.07210880120595296
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,1024,5120,0.029552000761032104
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,1024,5120,0.05971946716308594
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,1024,5120,0.040563201904296874
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,1024,4096,0.024036266406377158
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,1024,4096,0.032400000095367434
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,1024,4096,0.04811840057373047
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,1024,3584,0.02118826707204183
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,1024,16384,0.1128000020980835
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,1024,3584,0.04254613320032756
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,1024,3072,0.01857173244158427
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,1536,128,0.008872532844543457
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,1024,3072,0.03752426703770955
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,1024,2560,0.01646080017089844
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,1024,2560,0.032986666758855185
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,1024,2048,0.013909332950909934
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,1024,2048,0.028707200288772584
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,1024,2048,0.01954560081164042
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,1024,1536,0.011313066879908244
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,1024,12288,0.08672426541646322
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,1024,1536,0.026225066184997557
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,1024,1024,0.008620799581209818
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,1024,1024,0.021835732460021972
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,1024,10240,0.0716970682144165
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,1024,768,0.0073738664388656614
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,1024,768,0.020473599433898926
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,1024,8192,0.05881919860839844
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,1024,512,0.006183466811974844
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,1024,512,0.01918399930000305
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,1024,6144,0.04601600170135498
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,1024,256,0.005046399931112925
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,1024,256,0.017735467354456583
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,1024,3584,0.029180800914764403
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,1024,128,0.00447680006424586
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,1024,128,0.016612266500790916
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,1024,3072,0.025897600253423053
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,1024,64,0.0040853333969910945
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,1024,2560,0.022711465756098427
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,1024,64,0.016080000003178916
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,1024,32,0.004192000130812327
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,1024,32,0.01649066706498464
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,768,65536,0.2641770680745443
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,1024,1536,0.016059733430544534
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,768,16384,0.06982399622599283
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,1024,1024,0.01304746667544047
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,768,65536,0.6326186498006184
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,768,16384,0.16665172576904297
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,768,12288,0.05658239920934042
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,768,12288,0.13120213349660237
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,1024,768,0.01162453293800354
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,768,10240,0.050182398160298666
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,768,10240,0.11157546838124592
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,768,8192,0.03583999872207642
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,1024,512,0.010115200281143188
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,768,8192,0.05582293272018433
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,768,8192,0.08893120288848877
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,768,7168,0.032229334115982056
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,768,7168,0.07932906945546468
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,1024,256,0.00878613293170929
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,768,6144,0.03052373329798381
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,768,6144,0.0690602699915568
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,1024,128,0.007976533472537994
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,768,5120,0.026959999402364092
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,768,5120,0.0561738650004069
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,768,65536,0.40495678583780925
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,768,16384,0.10785600344340007
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,768,4096,0.021203200022379555
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,768,4096,0.04437333345413208
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,768,12288,0.08071466286977133
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,768,3584,0.01808213392893473
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,768,3584,0.039228800932566324
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,768,10240,0.06865920225779215
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,768,3072,0.016245333353678386
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,768,3072,0.03515093326568604
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,768,2560,0.014882133404413859
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,768,2560,0.030829866727193195
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,768,7168,0.04915413459142049
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,768,2048,0.011413333813349406
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,768,2048,0.0269269327322642
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,768,6144,0.043228801091512045
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,768,1536,0.009659733374913533
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,768,1536,0.02391466697057088
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,768,1536,0.014726400375366211
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,768,1024,0.007313066720962524
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,768,1024,0.02116159995396932
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,768,5120,0.037647998332977294
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,768,768,0.006436266501744588
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,768,768,0.019215999046961467
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,768,512,0.00547626664241155
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,768,512,0.018116267522176106
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,768,4096,0.030705066521962483
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,768,256,0.004556799928347269
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,768,256,0.01686613361040751
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,768,256,0.007956266899903615
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,768,3584,0.027270400524139406
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,768,128,0.004084266722202301
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,768,128,0.016156799594561257
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,768,3072,0.023800534009933472
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,768,64,0.0036373332142829893
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,768,32,0.0037994667887687682
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,768,64,0.01569706698258718
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,768,32,0.01585599978764852
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,768,2560,0.02103360096613566
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,512,65536,0.18315946261088054
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,512,16384,0.05242346525192261
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,512,16384,0.16285333633422852
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,512,65536,0.5862261454264324
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,512,12288,0.04857706626256307
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,768,2048,0.017692800362904867
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,512,12288,0.12621013323465985
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,512,10240,0.043082666397094724
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,768,1024,0.011915733416875202
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,512,10240,0.10904213587443035
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,768,768,0.010260267059008281
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,512,8192,0.03188160061836243
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,512,8192,0.08745493094126383
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,768,512,0.009244799613952637
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,512,7168,0.02900586724281311
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,512,7168,0.07722346782684326
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,512,6144,0.02574933369954427
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,512,6144,0.0660426656405131
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,768,128,0.0069248000780741375
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,512,5120,0.024112000068028768
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,512,5120,0.05391039848327637
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,512,65536,0.35973758697509767
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,512,16384,0.09641706943511963
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,512,4096,0.017283199230829875
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,512,4096,0.041716265678405764
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,512,12288,0.0738602638244629
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,512,3584,0.015913599729537965
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,512,3584,0.03639359871546428
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,512,3072,0.01434346636136373
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,512,3072,0.033149866263071696
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,512,10240,0.06305813392003377
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,512,2560,0.012086400389671325
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,512,8192,0.04991573492685954
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,512,2560,0.029654399553934736
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,512,2048,0.010171733299891154
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,512,2048,0.025689599911371867
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,512,7168,0.04464426835378011
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,512,1536,0.008405333757400513
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,512,1536,0.02323626677195231
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,512,6144,0.039435732364654544
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,512,1024,0.0061706667145093284
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,512,1024,0.020884267489115396
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,512,5120,0.03402666648228963
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,512,768,0.005666133264700572
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,512,768,0.01919893423716227
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,512,4096,0.027395200729370118
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,512,512,0.004779733220736186
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,512,512,0.017783466974894205
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,512,3072,0.021575466791788737
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,512,256,0.003960533440113068
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,512,256,0.01633280018965403
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,512,2048,0.016169599692026772
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,512,1024,0.011055999994277954
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,512,128,0.003705599904060364
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,512,128,0.015442132949829102
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,512,3584,0.02472426692644755
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,512,64,0.0034346667428811393
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,512,768,0.009642666578292847
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,512,64,0.015326933066050211
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,512,32,0.003483733286460241
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,512,32,0.015799466768900552
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,256,65536,0.11772480010986328
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,512,2560,0.019019732872645058
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,256,16384,0.03701440095901489
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,256,65536,0.5566645304361979
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,256,16384,0.1537941296895345
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,256,12288,0.03324906627337138
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,256,12288,0.11944746971130371
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,512,1536,0.013851733009020487
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,256,10240,0.029842134316762286
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,256,10240,0.1029695987701416
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,256,8192,0.02062186598777771
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,256,8192,0.08253973325093587
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,512,256,0.007604266703128815
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,512,512,0.008516266942024231
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,256,7168,0.019228800137837728
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,256,7168,0.07084693113962809
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,256,6144,0.01570026675860087
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,256,6144,0.059349334239959715
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,512,128,0.00680320014556249
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,256,5120,0.013921067118644714
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,256,5120,0.04724053144454956
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,256,65536,0.33260374069213866
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,256,4096,0.011195733149846395
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,256,16384,0.08829120000203451
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,256,4096,0.03834559917449951
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,256,3584,0.010084266463915508
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,256,3584,0.03422613143920898
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,256,12288,0.06667413711547851
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,256,3072,0.009193600217501322
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,256,3072,0.029865600665410358
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,256,10240,0.05729386806488037
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,256,2560,0.008208000163237254
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,256,2560,0.0269813338915507
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,256,8192,0.04598613182703654
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,256,2048,0.006611200173695882
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,256,2048,0.024618667364120484
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,256,7168,0.04105386734008789
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,256,1536,0.0056415999929110205
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,256,6144,0.03617279926935832
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,256,1536,0.021995733181635536
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,256,1024,0.004862933357556661
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,256,1024,0.01909866730372111
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,256,5120,0.03043839931488037
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,256,768,0.0044501334428787235
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,256,768,0.017547732591629027
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,256,4096,0.025066665808359784
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,256,512,0.003886933376391729
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,256,512,0.01689066688219706
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,256,3072,0.01957013408342997
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,256,256,0.003489066660404205
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,256,256,0.01593386630217234
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,256,3584,0.02260266741116842
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,256,128,0.003272533416748047
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,256,128,0.015518933534622192
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,256,2560,0.01745706597963969
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,256,64,0.0030847998956839246
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,256,64,0.015146666765213012
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,256,32,0.0031957333286603295
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,256,32,0.015266133348147073
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,128,65536,0.10746133327484131
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,256,2048,0.014845866958300272
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,128,65536,0.5494698842366537
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,128,16384,0.03203306595484416
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,128,16384,0.14924480120340983
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,256,1024,0.009841066598892213
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,256,768,0.008712533116340637
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,128,12288,0.025179733832677204
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,128,12288,0.11586879889170329
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,128,10240,0.021107200781504312
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,128,10240,0.09932053089141846
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,256,1536,0.012596266468365988
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,128,8192,0.013178666432698568
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,128,8192,0.07905279795328776
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,256,512,0.0077354664603869125
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,256,256,0.006841599941253662
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,128,7168,0.01264639993508657
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,128,7168,0.06860586802164713
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,128,6144,0.015186132987340293
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,256,128,0.005976533393065134
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,128,6144,0.057152001063028965
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,128,5120,0.013986133535703025
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,128,5120,0.044649600982666016
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,128,65536,0.32116800944010415
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,128,4096,0.010698666175206501
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,128,16384,0.08518293698628744
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,128,4096,0.03582506577173869
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,128,3584,0.00977066655953725
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,128,3584,0.03308586676915486
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,128,12288,0.0649621327718099
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,128,3072,0.009019733468691508
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,128,3072,0.02970133423805237
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,128,10240,0.05511680046717325
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,128,8192,0.045187199115753175
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,128,2560,0.007189333438873291
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,128,2560,0.02695786754290263
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,128,7168,0.0404202659924825
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,128,2048,0.006376533210277558
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,128,6144,0.03491946856180827
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,128,2048,0.023846399784088135
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,128,1536,0.005515733361244201
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,128,1536,0.021715199947357176
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,128,5120,0.028998400767644244
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,128,1024,0.00461760014295578
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,128,1024,0.019215999046961467
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,128,768,0.0042250668009122215
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,128,768,0.01768746574719747
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,128,4096,0.023769599199295045
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,128,512,0.0037269333998362223
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,128,512,0.017742933829625447
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,128,3072,0.019038933515548705
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,128,3584,0.021576533714930214
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,128,256,0.0034346667428811393
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,128,256,0.01565439999103546
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,128,128,0.0031360000371932983
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,128,128,0.015313067038853965
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,128,2048,0.014410666624704995
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,128,2560,0.016876800855000814
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,128,64,0.002932266642649968
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,128,32,0.0030154667794704436
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,128,64,0.015363199512163797
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,128,1024,0.009486933549245197
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,128,32,0.015069866180419922
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,64,65536,0.1032639980316162
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,64,16384,0.029547733068466187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,64,12288,0.023727999130884806
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,128,1536,0.012288000186284382
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,64,16384,0.14770453770955402
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,64,10240,0.01992320020993551
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,64,12288,0.11315093040466309
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,64,8192,0.011592533191045125
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,64,65536,0.5420149485270183
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,64,10240,0.09622186819712321
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,64,7168,0.010333866874376933
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,64,8192,0.07698240280151367
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,64,6144,0.015175466736157736
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,64,5120,0.008557867010434468
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,64,7168,0.0658453345298767
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,64,6144,0.05607253313064575
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,64,4096,0.007877333462238312
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,64,5120,0.04349546829859416
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,64,3584,0.007890133559703827
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,64,4096,0.03411200046539307
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,64,3584,0.031498666604359946
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,64,3072,0.006950399776299794
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,64,2560,0.006415999929110209
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,64,2048,0.005612800021966299
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,64,3072,0.028274132808049517
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,64,2560,0.02661973237991333
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,64,2048,0.024678399165471397
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,64,1536,0.00499839981396993
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,64,1536,0.022168533007303873
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,64,1024,0.004229333500067393
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,64,1024,0.01936960021654765
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,64,768,0.017730132738749186
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,64,768,0.0039434666434923805
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,128,768,0.008344533046086629
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,64,512,0.0035648000737031303
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,64,512,0.016747732957204185
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,64,256,0.0031850665807724
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,64,256,0.01585706671079
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,64,128,0.0029706666866938275
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,64,128,0.015388799707094827
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,64,64,0.0028138667345046996
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,64,64,0.014882133404413859
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,64,32,0.0029002666473388673
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,64,32,0.014688000082969666
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,128,512,0.007387733459472657
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,32,65536,0.10076159636179607
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,32,16384,0.028198399146397907
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,32,12288,0.022550400098164877
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,32,16384,0.14624640146891277
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,32,10240,0.017280000448226928
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,32,8192,0.009867733716964722
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,32,12288,0.11231359640757244
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,32,10240,0.09483199914296468
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,32,7168,0.009566932916641235
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,32,8192,0.0760416030883789
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,32,6144,0.008373333017031352
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,32,65536,0.5390623728434245
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,32,7168,0.06465280055999756
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,128,256,0.006602666775385539
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,32,6144,0.05370133320490519
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,32,5120,0.0077237332860628765
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,32,4096,0.007050666709740956
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,32,5120,0.04356693426767985
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2048,128,128,0.005914666752020518
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,32,4096,0.03386559883753459
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,32,3584,0.006737066805362702
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,32,3584,0.03126293420791626
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,32,3072,0.0063967997829119366
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,32,3072,0.02879146734873454
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,32,2560,0.006180266539255777
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,32,2560,0.025806933641433716
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,32,2048,0.0054400001962979635
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,32,2048,0.023593600591023764
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,32,1536,0.004789333542188009
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,32,1536,0.02181653380393982
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,32,1024,0.004186666508515676
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,32,1024,0.0190720001856486
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,32,768,0.003978666663169861
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,32,512,0.003656533360481262
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,32,768,0.01791999936103821
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,32,512,0.01726079980532328
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,32,256,0.0032853332658608755
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,32,128,0.0030847998956839246
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,32,256,0.015895467003186545
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,32,128,0.014962133765220643
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,32,64,0.002887466549873352
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,32,64,0.015053866306940713
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2048,32,32,0.0029887999097506206
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2048,32,32,0.014877866705258688
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,65536,12288,1.2302613576253256
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,65536,12288,1.4311829884847005
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,65536,16384,1.6061920166015624
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,65536,10240,1.855824025472005
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,65536,12288,2.187114715576172
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,65536,10240,1.0203829447428385
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,65536,16384,3.2691167195638022
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,65536,8192,0.8152693430582681
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,65536,8192,1.514289093017578
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,65536,8192,0.9189109166463216
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,65536,7168,0.70304749806722
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,65536,6144,0.649791971842448
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,65536,6144,1.125595728556315
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,65536,6144,0.6919071833292644
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,65536,7168,1.4450047810872397
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,65536,5120,0.5212650616963704
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,65536,5120,0.9398218790690104
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,65536,4096,0.4356959978739421
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,65536,4096,0.7384042739868164
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,65536,5120,0.5875008265177409
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,65536,16384,1.8591541290283202
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,65536,3584,0.3923263867696126
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,65536,3584,0.7100992202758789
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,65536,3072,0.37439467112223307
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,65536,3072,0.5786261240641276
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,65536,10240,1.1716693878173827
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,65536,3072,0.39621868133544924
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,65536,2560,0.28444585800170896
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,65536,2560,0.3384671847025553
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,65536,2560,0.5021375974019369
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,65536,2048,0.2513877391815186
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,65536,2048,0.43559681574503584
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,65536,1536,0.2940256118774414
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,65536,1536,0.1945525328318278
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,65536,1024,0.15490880012512206
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,65536,1024,0.23621759414672852
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,65536,7168,0.7983338673909505
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,65536,768,0.16368212699890136
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,65536,768,0.12650453249613444
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,65536,512,0.11976213455200195
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,65536,512,0.09285759925842285
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,65536,768,0.15044266382853191
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,65536,256,0.07982613245646158
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,65536,256,0.07846079667409261
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,65536,128,0.07257813612620036
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,65536,128,0.07239360014597575
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,65536,4096,0.47208960851033527
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,65536,3584,0.41809813181559247
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,65536,64,0.0661407987276713
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,65536,32,0.057765332857767734
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,65536,64,0.07879786491394043
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,65536,32,0.07217600345611572
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,65536,2048,0.2699786822001139
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,65536,1536,0.2214911937713623
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,65536,1024,0.16883093516031902
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,16384,16384,0.4158591906229655
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,65536,512,0.12526933352152508
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,16384,16384,0.6993322372436523
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,65536,256,0.10620693365732829
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,65536,128,0.09687999884287515
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,16384,65536,1.757101821899414
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,16384,12288,0.33827521006266276
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,16384,12288,0.5196874618530274
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,16384,12288,0.3446549415588379
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,16384,8192,0.21664746602376303
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,16384,10240,0.4272447903951009
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,16384,10240,0.2816266695658366
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,16384,10240,0.30084479649861656
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,16384,8192,0.3708064079284668
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,16384,65536,2.023853810628255
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,16384,65536,3.08003412882487
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,16384,7168,0.20229652722676597
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,16384,7168,0.30473814010620115
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,16384,6144,0.2726133346557617
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,16384,6144,0.19439679781595867
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,16384,5120,0.22161386807759603
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,16384,5120,0.1494165261586507
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,16384,5120,0.16129919687906902
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,16384,4096,0.180239995320638
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,16384,4096,0.12320213317871094
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,16384,3584,0.15970560709635417
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,16384,3584,0.11408426761627197
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,16384,3072,0.13791146278381347
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,16384,3072,0.1007317304611206
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,16384,3072,0.09986240069071452
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,16384,2560,0.1166218678156535
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,16384,2560,0.08593599796295166
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,16384,16384,0.4549578666687012
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,16384,2048,0.10155519644419353
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,16384,2048,0.07539626757303873
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,16384,7168,0.21255572636922201
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,16384,8192,0.23469759623209635
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,16384,1536,0.07453546524047852
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,16384,1536,0.06007680098215738
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,16384,1536,0.06234026749928793
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,16384,1024,0.052042667071024576
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,16384,6144,0.1830176035563151
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,16384,1024,0.04931413332621257
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,16384,768,0.04321386814117432
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,16384,768,0.04396586815516154
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,16384,4096,0.1249077320098877
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,16384,512,0.032689066727956136
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,16384,3584,0.11123946507771809
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,16384,512,0.03789120117823283
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,16384,256,0.02290453314781189
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,16384,256,0.03272106647491455
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,16384,128,0.018011732896169027
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,16384,128,0.02956906755765279
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,16384,64,0.014994133512179056
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,16384,64,0.02923840085665385
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,16384,2560,0.08544960021972656
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,16384,32,0.01581653356552124
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,16384,32,0.02923626701037089
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,16384,2048,0.07315946420033773
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,16384,1024,0.04867946704228719
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,12288,65536,1.353572209676107
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,16384,768,0.04340693155924479
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,16384,512,0.03721706469853719
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,16384,256,0.031133866310119628
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,12288,65536,2.299847412109375
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,12288,16384,0.5109546661376954
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,12288,16384,0.3766751925150553
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,12288,12288,0.39427200953165686
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,16384,128,0.02908266584078471
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,12288,12288,0.2498410701751709
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,12288,10240,0.21655359268188476
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,12288,10240,0.32896852493286133
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,12288,12288,0.2833642641703288
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,12288,8192,0.1722335974375407
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,12288,8192,0.2573781331380208
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,12288,7168,0.16060479482014972
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,12288,7168,0.24252586364746093
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,12288,7168,0.15929919878641766
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,12288,6144,0.1960063934326172
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,12288,6144,0.1400383949279785
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,12288,5120,0.16662933031717936
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,12288,5120,0.12441706657409668
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,12288,4096,0.13498454093933104
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,12288,4096,0.10494613647460938
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,12288,3584,0.11793706417083741
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,12288,3584,0.08967893123626709
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,12288,3584,0.0858346700668335
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,12288,3072,0.10258773167928059
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,12288,3072,0.07921493053436279
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,12288,16384,0.34460693995157876
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,12288,2560,0.0918837308883667
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,12288,2560,0.07524373531341552
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,12288,65536,1.5135050455729167
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,12288,10240,0.22617600758870443
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,12288,2048,0.09194453557332358
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,12288,2048,0.06207040150960287
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,12288,8192,0.1814282735188802
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,12288,1536,0.05674453179041544
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,12288,1536,0.0516981323560079
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,12288,1536,0.04822826782862345
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,12288,1024,0.040403199195861814
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,12288,1024,0.04287039836247762
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,12288,6144,0.13693547248840332
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,12288,1024,0.03835626840591431
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,12288,768,0.03323413332303365
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,12288,5120,0.11572373708089193
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,12288,768,0.03698026736577352
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,12288,512,0.02572480042775472
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,12288,256,0.01827413241068522
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,12288,512,0.03323093255360921
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,12288,512,0.029011199871699016
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,12288,4096,0.09636159737904867
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,12288,256,0.02818560004234314
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,12288,128,0.014133333166440328
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,12288,128,0.02635200023651123
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,12288,128,0.02261013388633728
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,12288,64,0.012040533622105916
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,12288,32,0.012582400441169738
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,12288,64,0.02614826758702596
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,12288,32,0.02488320072491964
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,12288,3072,0.07531413237253824
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,10240,16384,0.4286208152770996
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,10240,65536,1.143791961669922
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,10240,65536,1.2696202596028647
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,10240,16384,0.3000181198120117
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,12288,2560,0.06607786814371744
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,10240,65536,1.9457888285319012
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,10240,12288,0.39053866068522136
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,10240,12288,0.220086399714152
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,12288,2048,0.057513598601023355
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,10240,10240,0.1854357401529948
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,10240,10240,0.28562345504760744
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,12288,768,0.034058666229248045
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,10240,8192,0.21795412699381508
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,10240,8192,0.15769386291503906
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,10240,7168,0.19193493525187175
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,10240,7168,0.13613759676615397
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,12288,256,0.024196267127990723
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,10240,7168,0.13690346082051594
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,10240,6144,0.16507840156555176
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,10240,6144,0.11693120002746582
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,10240,6144,0.11716907024383545
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,10240,5120,0.13897919654846191
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,10240,5120,0.10068799654642742
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,10240,4096,0.1123423973719279
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,10240,4096,0.08479039669036866
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,10240,3584,0.0998965342839559
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,10240,3584,0.07771413326263428
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,10240,3584,0.07304960091908773
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,10240,3072,0.08659733136494954
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,10240,3072,0.06970880031585694
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,10240,16384,0.29367574055989587
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,10240,2560,0.07345813115437826
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,10240,2560,0.062337064743041994
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,10240,10240,0.18694507280985515
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,10240,2560,0.05621866782506307
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,10240,2048,0.06027413209279379
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,10240,12288,0.23010026613871254
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,10240,2048,0.052570664882659913
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,10240,1536,0.047305599848429365
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,10240,1536,0.04495786825815837
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,10240,1536,0.0408021330833435
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,10240,1024,0.03425813515981038
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,10240,768,0.027412267525990804
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,10240,1024,0.03638933499654134
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,10240,8192,0.15338133176167806
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,10240,768,0.03208000063896179
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,10240,512,0.020805333058039346
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,10240,512,0.02878933350245158
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,10240,256,0.014402133226394654
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,10240,256,0.024574933449427287
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,10240,5120,0.09847466945648194
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,10240,128,0.011016533772150675
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,10240,128,0.02288320064544678
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,10240,4096,0.08160853385925293
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,10240,64,0.009160533547401428
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,10240,64,0.022457599639892578
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,10240,32,0.009501866499582927
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,10240,32,0.022188800573349
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,10240,3072,0.06482880115509033
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,8192,65536,1.4499231974283853
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,10240,2048,0.049428268273671465
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,10240,1024,0.03229333360989888
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,8192,65536,0.9561152140299478
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,10240,768,0.029546666145324706
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,10240,512,0.02489173412322998
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,8192,16384,0.33802452087402346
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,8192,16384,0.24070080121358237
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,8192,12288,0.2625909328460693
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,8192,16384,0.2551392078399658
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,8192,12288,0.18266666730244954
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,10240,256,0.021028266350428263
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,8192,10240,0.21267199516296387
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,8192,10240,0.15791680018107096
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,8192,8192,0.12533653577168782
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,8192,8192,0.17461759249369305
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,8192,7168,0.15569920539855958
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,10240,128,0.01916159987449646
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,8192,7168,0.11206933657328289
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,8192,65536,1.0238346735636392
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,8192,7168,0.11076587041219074
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,8192,6144,0.1341759999593099
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,8192,6144,0.09922346274058023
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,8192,5120,0.08702826499938965
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,8192,5120,0.13170773188273113
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,8192,4096,0.08861440022786458
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,8192,4096,0.07375146547953287
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,8192,4096,0.0860543966293335
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,8192,3584,0.0783466657002767
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,8192,3584,0.06691946983337402
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,8192,3072,0.06792960166931153
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,8192,3072,0.05973653395970663
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,8192,3072,0.05358186562856039
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,8192,2560,0.058501334985097256
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,8192,2560,0.05249813397725424
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,8192,2048,0.04731839895248413
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,8192,2048,0.04501973390579224
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,8192,10240,0.15310719807942708
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,8192,12288,0.17980052630106608
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,8192,1536,0.037453866004943846
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,8192,8192,0.12396799723307292
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,8192,1536,0.0376362681388855
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,8192,1024,0.026630399624506633
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,8192,1024,0.031575467189153036
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,8192,1024,0.027617067098617554
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,8192,768,0.021541333198547362
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,8192,6144,0.09599040349324545
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,8192,768,0.028961066404978437
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,8192,5120,0.0822271982828776
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,8192,512,0.016731733083724977
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,8192,512,0.025513599316279095
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,8192,512,0.021643733978271483
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,8192,256,0.012083199620246888
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,8192,3584,0.06070826848347982
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,8192,256,0.022012799978256226
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,8192,128,0.008932266632715862
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,8192,128,0.020424532890319824
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,8192,64,0.007565866907437642
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,8192,2560,0.04691733519236247
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,8192,64,0.0204202671845754
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,8192,32,0.008042666812737782
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,8192,32,0.020489599307378134
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,8192,2048,0.04110933144887288
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,7168,65536,0.9025407791137695
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,7168,65536,1.4094783782958984
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,7168,16384,0.3306175867716471
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,8192,1536,0.033868801593780515
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,7168,16384,0.2686101277669271
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,7168,12288,0.24462505976359047
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,7168,12288,0.19144959449768068
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,7168,10240,0.21647893587748207
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,8192,768,0.024884267648061117
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,8192,256,0.0176746666431427
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,7168,10240,0.1568554719289144
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,8192,128,0.016371200482050575
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,7168,8192,0.12713279724121093
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,7168,8192,0.16828266779581708
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,7168,7168,0.14496426582336425
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,7168,7168,0.11311786969502766
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,7168,6144,0.12541653315226237
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,7168,6144,0.09980586369832357
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,7168,5120,0.1063050667444865
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,7168,5120,0.08592960039774576
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,7168,12288,0.1850015958150228
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,7168,4096,0.08507413069407145
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,7168,4096,0.07189866701761881
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,7168,65536,0.9848192214965821
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,7168,3584,0.08803199927012126
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,7168,3584,0.07178666591644287
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,7168,16384,0.24460159937540688
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,7168,6144,0.09532159964243571
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,7168,3072,0.0651253342628479
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,7168,3072,0.05758080085118612
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,7168,2560,0.055257598559061684
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,7168,2560,0.05034559965133667
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,7168,10240,0.1571605364481608
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,7168,2048,0.04594240188598633
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,7168,2048,0.0438645323117574
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,7168,7168,0.11407252947489421
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,7168,1536,0.036082132657368975
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,7168,1536,0.03707520167032878
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,7168,8192,0.12437226772308349
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,7168,1024,0.025860265890757246
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,7168,1024,0.030641067028045654
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,7168,5120,0.08186986446380615
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,7168,768,0.021547732750574748
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,7168,768,0.02784213423728943
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,7168,3072,0.05163733164469401
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,7168,512,0.016341333587964378
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,7168,512,0.025496532519658405
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,7168,4096,0.06682453155517579
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,7168,256,0.011796266833941142
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,7168,256,0.021636267503102623
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,7168,1536,0.030323199431101483
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,7168,128,0.008424533406893413
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,7168,128,0.02023573319117228
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,7168,3584,0.05894080003102621
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,7168,64,0.007315200070540111
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,7168,64,0.020558933417002358
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,7168,32,0.007779199878374736
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,7168,2560,0.04560853242874145
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,7168,32,0.02034133275349935
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,7168,768,0.020677334070205687
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,6144,65536,0.788362693786621
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,6144,65536,1.0722379048665365
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,7168,2048,0.037929598490397134
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,6144,16384,0.26313494046529134
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,6144,65536,0.758619753519694
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,6144,16384,0.21390825907389321
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,6144,12288,0.19883947372436522
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,6144,12288,0.1699530601501465
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,6144,10240,0.16646933555603027
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,7168,1024,0.02529279987017314
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,6144,10240,0.14359146753946941
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,6144,10240,0.12284586429595948
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,6144,8192,0.13218773206075032
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,6144,8192,0.11814293066660564
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,6144,7168,0.11662933031717937
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,6144,7168,0.10477546850840251
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,6144,6144,0.10276906490325928
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,6144,6144,0.08679893016815185
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,7168,256,0.013983999689420065
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,7168,512,0.0177130659421285
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,7168,128,0.012284800410270691
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,6144,5120,0.08549119631449381
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,6144,5120,0.07505813439687094
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,6144,16384,0.18568639755249022
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,6144,12288,0.1420394738515218
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,6144,5120,0.06417493422826132
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,6144,4096,0.06877226829528808
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,6144,4096,0.06269546747207641
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,6144,3584,0.062463998794555664
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,6144,3584,0.056739199161529544
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,6144,3072,0.053979734579722084
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,6144,8192,0.09834880034128825
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,6144,3072,0.0508074680964152
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,6144,3072,0.04301760196685791
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,6144,2560,0.04477013349533081
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,6144,2560,0.04460800091425578
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,6144,7168,0.0859989325205485
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,6144,2048,0.03689066569010417
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,6144,2048,0.03803306818008423
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,6144,2048,0.03289600014686585
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,6144,1536,0.028735999266306562
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,6144,1536,0.033369600772857666
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,6144,1024,0.02100373307863871
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,6144,6144,0.07545279661814372
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,6144,1024,0.027484800418217974
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,6144,1024,0.022452267011006673
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,6144,768,0.01718613306681315
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,6144,768,0.025438932577768962
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,6144,512,0.013556266824404398
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,6144,512,0.02318613330523173
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,6144,512,0.01761173407236735
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,6144,256,0.009373866518338521
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,6144,256,0.020617600282033285
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,6144,256,0.01434879998366038
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,6144,128,0.007287466526031494
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,6144,128,0.018943999210993448
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,6144,64,0.006318933268388112
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,6144,64,0.019554134209950766
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,6144,32,0.006567466755708058
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,6144,32,0.019270400206247963
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,5120,65536,0.8458634694417319
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,6144,4096,0.05306986570358276
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,6144,3584,0.04806400140126546
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,6144,2560,0.03747626543045044
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,5120,16384,0.21352747281392417
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,5120,65536,0.7187946955362956
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,5120,16384,0.17955199877421063
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,5120,12288,0.16174826622009278
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,6144,1536,0.027382399638493853
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,5120,12288,0.14372587203979492
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,5120,10240,0.1359829266866048
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,5120,10240,0.11338773568471272
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,6144,768,0.020256000757217407
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,5120,8192,0.10964799722035726
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,5120,8192,0.093995730082194
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,5120,7168,0.09656960169474284
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,5120,7168,0.08461759885152181
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,6144,128,0.013345066706339517
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,5120,6144,0.08171839714050293
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,5120,6144,0.07586346467336019
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,5120,5120,0.07006080150604248
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,5120,5120,0.0664629340171814
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,5120,12288,0.15372479756673177
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,5120,4096,0.05666666825612386
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,5120,4096,0.05643413464228312
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,5120,6144,0.07349973519643148
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,5120,3584,0.049713067213694256
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,5120,3584,0.051242665449778235
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,5120,16384,0.191103998819987
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,5120,3072,0.04431573152542114
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,5120,3072,0.04632426500320434
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,5120,10240,0.13760533332824706
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,5120,2560,0.036984535058339436
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,5120,2560,0.04007999897003174
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,5120,65536,0.7738229115804036
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,5120,2048,0.0350325345993042
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,5120,2048,0.03706560134887695
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,5120,7168,0.08693866729736328
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,5120,1536,0.024430932601292928
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,5120,1536,0.030989867448806763
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,5120,8192,0.09938666820526124
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,5120,1024,0.018153599898020425
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,5120,1024,0.026605866352717084
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,5120,5120,0.06391573349634806
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,5120,768,0.015152000387509666
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,5120,768,0.024209066232045492
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,5120,3072,0.04063466787338257
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,5120,512,0.012100266416867574
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,5120,512,0.021475199858347574
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,5120,4096,0.0525322675704956
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,5120,256,0.008636800448099773
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,5120,256,0.019705599546432494
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,5120,1536,0.024150399367014568
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,5120,128,0.00710399995247523
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,5120,128,0.018502400318781535
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,5120,2560,0.03646186590194702
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,5120,64,0.006342400113741558
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,5120,64,0.018756266434987387
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,5120,32,0.0063967997829119366
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,5120,32,0.018652800718943277
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,5120,3584,0.046855465571085615
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,5120,768,0.01660053332646688
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,5120,1024,0.02036799987157186
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,4096,65536,0.6800981521606445
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,4096,65536,0.5907978693644206
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,4096,16384,0.17810986836751302
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,4096,16384,0.15685332616170247
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,4096,12288,0.12661226590474445
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,4096,12288,0.11888000170389812
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,5120,2048,0.030269867181777953
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,4096,10240,0.10643413066864013
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,4096,10240,0.09874880313873291
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,5120,128,0.009924266735712688
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,4096,8192,0.08661226431528726
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,5120,256,0.011269332965215047
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,4096,8192,0.08289066950480142
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,4096,7168,0.07631786664326987
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,4096,16384,0.13404800097147623
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,4096,65536,0.5227264086405436
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,4096,6144,0.06561386585235596
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,4096,7168,0.0752895991007487
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,4096,6144,0.06663466691970825
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,4096,6144,0.05498239994049072
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,5120,512,0.013771733641624451
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,4096,12288,0.10394453207651774
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,4096,5120,0.05584959983825684
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,4096,5120,0.05780479907989502
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,4096,4096,0.04604053497314453
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,4096,3584,0.039909334977467854
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,4096,4096,0.04923306703567505
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,4096,3584,0.043937067190806076
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,4096,3072,0.03500800132751465
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,4096,3072,0.039317333698272706
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,4096,3072,0.03215786616007487
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,4096,2560,0.030316799879074097
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,4096,2560,0.03504426479339599
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,4096,2048,0.024652800957361855
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,4096,10240,0.08648746808369955
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,4096,2048,0.03131519953409831
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,4096,1536,0.019663999478022255
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,4096,1536,0.02685439984003703
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,4096,1536,0.02062293291091919
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,4096,1024,0.014881066481272378
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,4096,1024,0.022798933585484824
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,4096,8192,0.070360533396403
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,4096,768,0.01250986655553182
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,4096,7168,0.06263146797815958
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,4096,768,0.021577600638071695
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,4096,768,0.015398400028546652
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,4096,512,0.01011306643486023
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,4096,512,0.020312533775965372
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,4096,256,0.007210666437943776
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,4096,5120,0.046945067246754964
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,4096,4096,0.039933868249257404
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,4096,256,0.0183242658774058
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,4096,3584,0.03588800032933553
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,4096,128,0.006169599791367849
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,4096,128,0.01768640081087748
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,4096,64,0.005380266904830932
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,4096,64,0.01741973360379537
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,4096,32,0.0055178667108217875
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,4096,2560,0.028172800938288372
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,4096,32,0.01734293301900228
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,4096,2048,0.024269866943359374
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,3584,16384,0.15909333229064943
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,3584,65536,0.6371093114217122
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,3584,65536,0.5786975860595703
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,4096,1024,0.016722132762273155
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,3584,16384,0.15199039777119955
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,3584,12288,0.12289280096689861
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,3584,16384,0.15545493761698406
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,3584,12288,0.1148576021194458
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,3584,10240,0.10306666692097981
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,3584,10240,0.10842239856719971
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,3584,8192,0.08323093255360922
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,3584,8192,0.09035840034484863
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,3584,7168,0.07449920177459717
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,3584,7168,0.07513279914855957
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,4096,512,0.01320319970448812
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,3584,6144,0.05990186532338461
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,4096,256,0.011435733238855997
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,3584,6144,0.06516160170237223
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,4096,128,0.010430933038393656
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,3584,5120,0.05196266571680704
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,3584,5120,0.05689599911371866
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,3584,4096,0.04111466805140178
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,3584,4096,0.04445866743723552
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,3584,65536,0.5427530924479167
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,3584,3584,0.035853866736094156
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,3584,3584,0.0407477339108785
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,3584,12288,0.10401386419932049
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,3584,10240,0.08948693275451661
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,3584,3072,0.03051946759223938
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,3584,8192,0.07245866457621256
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,3584,3072,0.03652799924214681
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,3584,2560,0.02699626684188843
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,3584,2560,0.03261866569519043
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,3584,2048,0.021651200453440347
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,3584,2048,0.029257599512736005
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,3584,7168,0.06456640164057413
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,3584,1536,0.017794134219487508
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,3584,1536,0.02544533411661784
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,3584,6144,0.054877865314483645
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,3584,5120,0.046659199396769206
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,3584,1024,0.012756266196568809
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,3584,1024,0.02146986722946167
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,3584,1024,0.015752533078193666
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,3584,4096,0.0392576018969218
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,3584,768,0.010420266787211101
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,3584,768,0.020426666736602782
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,3584,512,0.007934933404127757
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,3584,512,0.018935465812683107
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,3584,3584,0.03522773186365764
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,3584,256,0.0062282666563987735
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,3584,256,0.017779199282328288
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,3584,3072,0.030769066015879316
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,3584,128,0.005272533496220907
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,3584,2560,0.02771199941635132
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,3584,128,0.01735573410987854
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,3584,64,0.00491839994986852
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,3584,2048,0.02290239930152893
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,3584,64,0.017388800779978432
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,3584,32,0.005243733525276184
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,3584,32,0.017544533809026083
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,3072,65536,0.5135594685872396
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,3072,65536,0.5119263966878255
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,3072,16384,0.14203093846638998
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,3072,16384,0.13527359962463378
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,3584,1536,0.019142399231592812
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,3072,12288,0.09964799880981445
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,3072,12288,0.10275306701660156
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,3072,10240,0.08289173444112143
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,3072,10240,0.08720213572184245
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,3584,768,0.013319466511408487
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,3072,8192,0.06730026404062907
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,3584,512,0.011383466919263204
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,3072,8192,0.07428692976633708
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,3072,7168,0.05921813249588013
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,3072,7168,0.06647573312123617
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,3584,256,0.009614933530489604
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,3072,6144,0.051412268479665124
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,3072,6144,0.06020053227742513
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,3584,128,0.008513066172599792
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,3072,5120,0.04391680161158244
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,3072,5120,0.05165760119756063
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,3072,12288,0.09299413363138834
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,3072,4096,0.03513493140538533
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,3072,4096,0.04228800137837728
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,3072,16384,0.12237333456675212
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,3072,65536,0.4759968121846517
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,3072,3584,0.030908799171447753
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,3072,3584,0.03819733460744222
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,3072,3072,0.027446399132410686
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,3072,3072,0.035010135173797606
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,3072,6144,0.04881173372268677
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,3072,2560,0.023322665691375734
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,3072,10240,0.07842559814453125
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,3072,2560,0.031242666641871135
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,3072,2048,0.019502933820088705
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,3072,2048,0.02797866662343343
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,3072,7168,0.056244266033172605
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,3072,1536,0.015736533204714458
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,3072,1536,0.02492799957593282
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,3072,8192,0.06496533155441284
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,3072,1024,0.012054399649302164
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,3072,1024,0.021699200073877968
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,3072,5120,0.04400213162104289
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,3072,768,0.010295466581980387
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,3072,768,0.019808000326156615
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,3072,3072,0.028230400880177815
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,3072,512,0.007978666822115581
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,3072,512,0.018956800301869713
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,3072,4096,0.035201064745585126
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,3072,256,0.006186666587988535
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,3072,256,0.017803732554117838
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,3072,1536,0.017946666479110716
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,3072,128,0.005273599922657013
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,3072,128,0.016976000865300496
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,3072,2560,0.025212800502777098
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,3072,3584,0.032390399773915605
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,3072,64,0.004668800036112467
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,3072,32,0.00487253318230311
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,3072,64,0.01705706715583801
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,3072,32,0.01722453236579895
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,2560,65536,0.4317023913065593
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,2560,65536,0.4865237236022949
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,3072,768,0.012689066926638284
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,2560,16384,0.11164159774780273
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,3072,2048,0.021441066265106203
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,3072,1024,0.014498133460680643
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,2560,16384,0.1279754638671875
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,2560,12288,0.08524693648020426
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,2560,12288,0.09529600143432618
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,2560,10240,0.07196906407674154
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,2560,10240,0.08144746621449789
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,2560,8192,0.05755626757939657
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,2560,8192,0.06907626787821451
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,3072,256,0.009551999966303508
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,2560,7168,0.05090239842732748
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,3072,512,0.011441066861152649
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,2560,7168,0.06257386604944865
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,2560,6144,0.04424426555633545
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,3072,128,0.008547199765841167
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,2560,6144,0.05619626839955648
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,2560,12288,0.08700586954752604
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,2560,5120,0.03832319974899292
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,2560,5120,0.047780267397562665
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,2560,4096,0.030800000826517744
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,2560,4096,0.039534934361775714
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,2560,16384,0.11436693668365479
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,2560,3584,0.027320533990859985
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,2560,65536,0.43586985270182294
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,2560,3584,0.03694080114364624
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,2560,3072,0.023970133066177367
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,2560,10240,0.07300480206807455
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,2560,3072,0.033479468027750654
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,2560,2560,0.02093440095583598
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,2560,2560,0.030508800347646074
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,2560,8192,0.05946666797002157
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,2560,2048,0.01764586567878723
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,2560,2048,0.027350399891535444
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,2560,6144,0.04524266719818115
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,2560,1536,0.014603733023007711
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,2560,1536,0.024495999018351235
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,2560,7168,0.05271253188451132
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,2560,1024,0.011065600315729777
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,2560,1024,0.021821866432825722
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,2560,3072,0.025880533456802367
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,2560,768,0.009453866879145305
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,2560,768,0.020270933707555137
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,2560,4096,0.032363732655843094
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,2560,512,0.007796266674995422
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,2560,512,0.01948480010032654
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,2560,5120,0.04012053410212199
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,2560,256,0.005941333373387655
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,2560,256,0.017527467012405394
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,2560,2560,0.023252266645431518
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,2560,3584,0.029815467198689778
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,2560,128,0.005092266698678335
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,2560,128,0.017076265811920167
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,2560,64,0.004586666822433472
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,2560,64,0.01722559928894043
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,2560,32,0.004811733464399974
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,2560,32,0.016999467213948568
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,2048,65536,0.33945067723592126
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,2560,1536,0.016272000471750894
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,2048,65536,0.4358847935994466
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,2048,16384,0.08758186499277751
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,2048,16384,0.11516906420389812
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,2048,16384,0.08491093317667643
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,2048,12288,0.0659007986386617
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,2560,2048,0.019655466079711914
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,2048,12288,0.08752533594767252
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,2048,12288,0.06336533228556315
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,2048,10240,0.055384532610575354
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,2048,10240,0.07364053726196289
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,2048,10240,0.0550495982170105
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,2048,8192,0.04492373466491699
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,2048,8192,0.06330346663792928
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,2048,8192,0.044625067710876466
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,2048,7168,0.03975253502527873
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,2560,1024,0.013433600465456644
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,2048,7168,0.05546986659367879
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,2048,6144,0.03492053349812825
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,2048,6144,0.048342398802439374
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,2048,6144,0.03532906770706177
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,2048,5120,0.02895359992980957
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,2048,5120,0.0414741317431132
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,2048,4096,0.024126933018366496
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,2048,4096,0.03521813154220581
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,2048,65536,0.3150933265686035
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,2560,256,0.008705066641171773
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,2048,3584,0.021057067314783733
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,2560,128,0.007928533355395
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,2048,3584,0.03266026576360066
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,2560,512,0.010152533650398254
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,2048,3072,0.01873813271522522
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,2048,3072,0.03064746658007304
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,2048,2560,0.016294399897257485
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,2560,768,0.011692800124486287
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,2048,2560,0.027830400069554645
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,2048,2048,0.013925333817799887
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,2048,2048,0.02477759917577108
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,2048,2048,0.016250666975975037
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,2048,1536,0.01159999966621399
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,2048,1536,0.022098133961359658
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,2048,1024,0.00864533285299937
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,2048,1024,0.020014933745066323
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,2048,7168,0.04028160174687703
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,2048,768,0.007398400207360585
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,2048,5120,0.030142933130264282
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,2048,768,0.019129600127538046
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,2048,512,0.0062165334820747375
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,2048,512,0.018105600277582803
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,2048,4096,0.025407999753952026
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,2048,256,0.0050453335046768185
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,2048,256,0.01683626572291056
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,2048,3584,0.023423999547958374
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,2048,128,0.004502399762471517
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,2048,3072,0.020691200097401937
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,2048,128,0.016294399897257485
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,2048,2560,0.01840106646219889
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,2048,64,0.0041461333632469176
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,2048,64,0.016174933314323424
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,2048,32,0.004293333490689596
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,2048,32,0.0160970667997996
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,2048,1536,0.0134442667166392
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,2048,1024,0.011358933647473653
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,1536,65536,0.2677770614624023
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,1536,16384,0.06809066931406657
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,1536,65536,0.39592107137044275
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,1536,16384,0.10350933074951171
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,1536,12288,0.05655146837234497
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,1536,12288,0.08206933339436849
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,2048,768,0.010644267002741497
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,1536,10240,0.04323413372039795
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,1536,10240,0.06789226531982422
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,2048,512,0.009633066256841023
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,1536,8192,0.034815998872121175
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,2048,256,0.008239999910195668
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,1536,8192,0.056056535243988036
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,1536,7168,0.03349866469701131
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,1536,7168,0.04930773178736369
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,2048,128,0.007732266684373219
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,1536,6144,0.026763733228047686
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,1536,6144,0.04250880082448323
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,1536,5120,0.022795732816060385
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,1536,5120,0.036637866497039796
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,1536,65536,0.29509121576944985
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,1536,4096,0.018729599316914876
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,1536,16384,0.07816853523254394
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,1536,4096,0.03185919920603435
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,1536,4096,0.023452800512313843
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,1536,3584,0.016937599579493205
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,1536,3584,0.0300437331199646
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,1536,3072,0.015084800124168397
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,1536,12288,0.05857813358306885
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,1536,3072,0.02696853280067444
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,1536,2560,0.013166933258374532
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,1536,2560,0.024844799439112344
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,1536,10240,0.05027626752853394
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,1536,2048,0.011160533626874287
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,1536,2048,0.02241493264834086
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,1536,8192,0.04219306707382202
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,1536,1536,0.009108266234397889
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,1536,1536,0.020182400941848755
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,1536,7168,0.03726186752319336
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,1536,1024,0.0071733335653940845
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,1536,1024,0.01949013272921244
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,1536,6144,0.03190293312072754
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,1536,768,0.006301866471767425
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,1536,768,0.01836479902267456
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,1536,5120,0.02794346610705058
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,1536,512,0.0053493330876032506
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,1536,512,0.01730453372001648
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,1536,3584,0.021658666928609214
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,1536,256,0.004632533093293508
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,1536,256,0.01641813317934672
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,1536,256,0.007388799885908763
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,1536,3072,0.019067732493082683
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,1536,128,0.004037333279848098
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,1536,128,0.015979733069737753
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,1536,2560,0.016582399606704712
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,1536,64,0.003697066754102707
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,1536,32,0.0038389332592487337
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,1536,64,0.015818666418393454
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,1536,32,0.016080000003178916
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,1536,2048,0.014274133245150247
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,1024,65536,0.1820906639099121
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,1536,1536,0.011982933680216471
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,1024,16384,0.051950931549072266
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,1024,65536,0.3473525365193685
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,1024,16384,0.09747200012207032
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,1024,12288,0.04643306732177734
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,1536,1024,0.010232533017794292
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,1024,12288,0.0773098627726237
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,1024,10240,0.042558932304382326
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,1024,10240,0.06565653483072917
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,1024,8192,0.029986133178075153
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,1024,8192,0.04962026675542196
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,1536,768,0.009348266323407491
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,1024,7168,0.026044799884160356
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,1024,7168,0.04469546476999919
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,1536,512,0.008467200398445129
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,1024,6144,0.025576533873875935
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,1024,6144,0.039308798313140866
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,1536,128,0.006635733445485433
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,1024,5120,0.02259413401285807
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,1024,5120,0.03438719908396403
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,1024,65536,0.25136213302612304
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,1024,16384,0.06686613559722901
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,1024,4096,0.017387733856836955
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,1024,4096,0.029764266808827718
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,1024,12288,0.05283840099970499
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,1024,3584,0.014707199732462563
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,1024,3584,0.028304000695546467
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,1024,10240,0.04671253363291423
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,1024,3072,0.014019200205802917
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,1024,3072,0.025945599873860675
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,1024,2560,0.012243200341860454
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,1024,2560,0.023846399784088135
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,1024,8192,0.03593813180923462
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,1024,7168,0.032308266560236616
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,1024,2048,0.0101173331340154
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,1024,2048,0.02156053384145101
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,1024,6144,0.028246400753657024
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,1024,1536,0.0076000000039736434
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,1024,1536,0.019925334056218467
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,1024,5120,0.024702932437260947
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,1024,1024,0.006221866607666016
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,1024,1024,0.018462934096654258
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,1024,768,0.005589333176612854
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,1024,768,0.017139200369517008
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,1024,4096,0.020627200603485107
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,1024,512,0.004785066843032837
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,1024,512,0.016732800006866454
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,1024,3072,0.017131733894348144
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,1024,256,0.00403413325548172
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,1024,256,0.016025599837303162
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,1024,3584,0.01894186735153198
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,1024,128,0.0036501333117485045
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,1024,2048,0.012748799721399941
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,1024,128,0.015289599696795145
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,1024,64,0.0034730667869249977
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,1024,64,0.015335466464360556
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,1024,32,0.0035445332527160645
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,1024,2560,0.015010133385658264
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,1024,32,0.015296000242233276
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,768,65536,0.137936004002889
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,1024,1024,0.009294933080673218
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,768,16384,0.04089386860529582
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,768,65536,0.3283679962158203
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,768,16384,0.09586986700693766
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,768,12288,0.035613866647084554
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,768,12288,0.073852801322937
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,1024,768,0.008709333340326945
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,768,10240,0.03131626645723979
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,768,10240,0.06175466775894165
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,1024,1536,0.011206400394439698
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,768,8192,0.025616000096003216
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,768,8192,0.04792213439941406
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,1024,256,0.007062399884064992
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,768,7168,0.022781866788864135
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,1024,512,0.008004266520341237
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,768,7168,0.04248746633529663
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,768,6144,0.020054399967193604
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,768,6144,0.03759146531422933
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,1024,128,0.0066431999206542965
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,768,5120,0.01717653274536133
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,768,5120,0.03291093309720357
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,768,12288,0.04805013338724772
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,768,4096,0.013723733027776084
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,768,4096,0.028922667105992634
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,768,65536,0.23967893918355307
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,768,3584,0.013513599832852682
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,768,16384,0.062990931669871
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,768,3584,0.02680640021959941
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,768,3072,0.01237333317597707
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,768,3072,0.02486293315887451
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,768,10240,0.041739734013875325
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,768,2560,0.010272000233332317
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,768,2560,0.022578134139378866
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,768,8192,0.03381226857503255
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,768,2048,0.008493866523106892
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,768,2048,0.02097919980684916
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,768,6144,0.026718932390213012
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,768,1536,0.0068351998925209045
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,768,1536,0.01919680039087931
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,768,7168,0.030238932371139525
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,768,1024,0.005499733487764994
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,768,1024,0.017992534240086875
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,768,5120,0.022776534159978233
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,768,768,0.0048543999592463175
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,768,768,0.0171509325504303
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,768,3072,0.015634133418401083
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,768,512,0.004282666742801667
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,768,4096,0.019232000907262167
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,768,512,0.016782933473587038
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,768,256,0.0036703998843828833
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,768,256,0.016396799683570863
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,768,3584,0.017178666591644288
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,768,128,0.0033471999069054925
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,768,128,0.015013333161671957
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,768,2560,0.013748266299565635
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,768,64,0.003094399968783061
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,768,64,0.015100799997647605
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,768,32,0.003239466746648153
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,768,1536,0.010122666756312054
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,768,32,0.015065600474675497
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,512,65536,0.09852373600006104
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,512,65536,0.3056330680847168
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,512,16384,0.032001066207885745
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,512,16384,0.0910965363184611
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,768,1024,0.00853760043780009
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,512,12288,0.03447680075963338
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,768,2048,0.011758933464686077
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,512,12288,0.06875627040863037
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,512,10240,0.030385067065556843
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,512,10240,0.05687253475189209
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,768,768,0.007880533238252004
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,512,8192,0.020568533738454183
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,512,8192,0.04371840159098307
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,768,256,0.006214400132497152
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,768,512,0.007163733243942261
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,512,7168,0.017545600732167564
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,512,7168,0.03900373379389445
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,768,128,0.00580266664425532
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,512,6144,0.01774079998334249
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,512,6144,0.03515093326568604
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,512,5120,0.015735466281572977
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,512,5120,0.030595199267069502
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,512,65536,0.2244352022806803
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,512,4096,0.011533866326014202
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,512,4096,0.026233599583307905
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,512,16384,0.05732160011927286
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,512,3584,0.009706667065620423
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,512,3584,0.024553600947062174
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,512,12288,0.0452128012975057
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,512,3072,0.00874773363272349
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,512,3072,0.023476266860961915
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,512,10240,0.039819733301798506
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,512,2560,0.00786133309205373
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,512,2560,0.022443733612696328
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,512,8192,0.032308266560236616
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,512,2048,0.007053866485754649
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,512,2048,0.020623999834060668
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,512,7168,0.027859199047088622
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,512,1536,0.005690666536490122
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,512,1536,0.018837332725524902
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,512,6144,0.02442773381868998
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,512,1024,0.0047882666190465295
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,512,1024,0.017658666769663493
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,512,5120,0.020802134275436403
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,512,768,0.004418133199214936
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,512,768,0.016498133540153503
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,512,4096,0.017857066790262856
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,512,512,0.003977599988381068
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,512,512,0.01623679995536804
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,512,3072,0.014498133460680643
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,512,256,0.0034527999659379324
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,512,256,0.01565120021502177
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,512,3584,0.01641706625620524
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,512,128,0.0032106667757034303
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,512,128,0.015080533425013223
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,512,2560,0.01316266655921936
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,512,64,0.0031669333577156065
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,512,64,0.01523413360118866
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,512,32,0.0032831999162832894
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,512,32,0.015129599968592325
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,256,65536,0.06871146361033122
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,512,2048,0.011517866452534994
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,256,65536,0.28935254414876305
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,512,1024,0.008075733482837678
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,256,16384,0.02300800085067749
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,256,16384,0.08434986273447673
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,256,12288,0.02007466753323873
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,512,1536,0.009757866462071735
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,256,12288,0.06511253515879313
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,256,10240,0.017591466506322227
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,256,10240,0.05222506523132324
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,512,768,0.007655466596285502
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,256,8192,0.01290986637274424
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,256,8192,0.04047573407491048
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,512,512,0.007101866602897644
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,256,7168,0.012823466459910074
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,256,7168,0.0350655992825826
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,512,256,0.006299733122189839
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,256,6144,0.013779200116793313
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,512,128,0.005816533168156942
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,256,6144,0.03351893424987793
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,256,5120,0.012364799777666729
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,256,5120,0.03015786608060201
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,256,65536,0.20952000617980956
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,256,16384,0.05500799814860026
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,256,4096,0.009755733609199523
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,256,4096,0.025948800643285114
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,256,3584,0.008368000388145447
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,256,3584,0.024577067295710246
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,256,12288,0.04293973445892334
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,256,3072,0.007788800199826558
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,256,3072,0.023324799537658692
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,256,10240,0.03669439951578776
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,256,2560,0.007494399944941203
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,256,2560,0.02228053410847982
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,256,8192,0.02909653385480245
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,256,2048,0.006687999765078227
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,256,2048,0.020147200425465903
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,256,7168,0.026317866643269856
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,256,1536,0.005760000149408976
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,256,1536,0.01880319913228353
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,256,6144,0.0228000005086263
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,256,1024,0.004747733473777771
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,256,1024,0.017359999815622966
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,256,5120,0.01991999944051107
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,256,768,0.004249600072701773
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,256,768,0.01686613361040751
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,256,4096,0.017322667439778647
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,256,512,0.0038399999340375268
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,256,512,0.016357333461443583
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,256,3072,0.013868799805641175
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,256,256,0.003639466563860575
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,256,256,0.01567146678765615
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,256,3584,0.01574613352616628
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,256,2560,0.012588799993197123
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,256,128,0.003066666672627131
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,256,128,0.015018666783968607
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,256,2048,0.010824533303578694
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,256,64,0.002917333443959554
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,256,64,0.014828800161679586
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,256,32,0.0029834667841593427
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,256,32,0.014919466773668923
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,128,65536,0.058849068482716885
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,256,1536,0.009511466821034749
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,128,65536,0.28287251790364587
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,128,16384,0.014605866869290671
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,256,768,0.0072970668474833175
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,128,16384,0.07998826503753662
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,256,1024,0.007838933169841767
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,128,12288,0.010847999652226766
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,128,12288,0.05649706522623697
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,128,10240,0.009947733084360758
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,128,10240,0.04656320015589396
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,128,8192,0.00819413314263026
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,256,512,0.006844800213972728
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,128,8192,0.03687573273976644
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,128,7168,0.011496532956759136
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,128,7168,0.03310933311780294
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,256,256,0.005967999994754791
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,128,6144,0.008162133395671844
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,128,6144,0.03163733283678691
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,256,128,0.005582933127880096
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,128,5120,0.008553600311279297
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,128,65536,0.20497280756632485
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,128,5120,0.02818560004234314
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,128,16384,0.05508480072021484
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,128,4096,0.007753600180149078
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,128,4096,0.025009065866470337
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,128,12288,0.04254080057144165
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,128,3584,0.008347733815511068
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,128,3584,0.023913600047429404
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,128,3072,0.008088533580303193
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,128,3072,0.02215999960899353
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,128,10240,0.03595306475957234
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,128,2560,0.006380799909432728
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,128,2560,0.021373866001764934
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,128,2560,0.012341333429018657
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,128,2048,0.005669333537419637
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,128,2048,0.019359999895095827
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,128,8192,0.029269333680470782
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,128,1536,0.004997333387533823
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,128,7168,0.02603413263956706
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,128,1536,0.018573866287867228
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,128,1024,0.004257066547870636
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,128,1024,0.017078399658203125
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,128,6144,0.02271040081977844
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,128,768,0.003912533322970072
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,128,768,0.01718826691309611
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,128,5120,0.01985599994659424
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,128,512,0.0035466666022936503
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,128,512,0.016126933693885803
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,128,4096,0.016839466492335
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,128,256,0.003218133250872294
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,128,256,0.015590399503707886
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,128,3584,0.01562879979610443
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,128,128,0.0029290666182835894
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,128,3072,0.0137855996688207
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,128,128,0.014847999811172486
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,128,64,0.0028160000840822858
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,128,64,0.014775466918945313
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,128,32,0.0029472000896930695
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,128,32,0.015031466881434122
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,128,2048,0.01074773371219635
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,64,65536,0.053762133916219076
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,64,16384,0.01088213324546814
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,64,65536,0.28015467325846355
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,64,16384,0.07543360392252604
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,64,12288,0.009371733665466309
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,128,1536,0.009654399752616883
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,64,10240,0.008025600016117096
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,64,12288,0.05610773166020712
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,64,8192,0.0070816000302632645
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,64,10240,0.04457279841105143
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,64,8192,0.035529601573944095
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,64,7168,0.006877866884072621
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,64,7168,0.031623466809590654
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,128,1024,0.007788800199826558
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,64,6144,0.00830080012480418
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,64,5120,0.0077237332860628765
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,64,6144,0.02908586661020915
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,64,4096,0.007032533486684163
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,64,5120,0.028087466955184937
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,64,4096,0.026107732454935712
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,64,3584,0.006715733309586842
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,64,3584,0.02365866700808207
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,64,3072,0.006374399860699971
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,64,3072,0.022516266504923502
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,64,2560,0.0062047998110453285
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,64,2560,0.02068159977595011
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,64,2048,0.0056320001681645715
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,64,2048,0.02015786568323771
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,64,1536,0.004861866434415182
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,128,768,0.007464533547560374
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,64,1536,0.018594133853912353
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,64,1024,0.004242133100827535
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,64,1024,0.01765759984652201
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,64,768,0.00384853333234787
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,64,512,0.00346666673819224
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,64,768,0.016499200463294984
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,128,512,0.006773333251476288
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,64,256,0.003123199939727783
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,64,512,0.016123732924461363
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,64,256,0.015428266922632852
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,64,128,0.002976000060637792
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,64,128,0.014949333667755128
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,64,64,0.002916266769170761
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,128,256,0.005955199897289276
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,64,64,0.015014400084813436
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,32,65536,0.052085332075754796
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,64,32,0.0029834667841593427
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,64,32,0.014577066898345948
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,32,16384,0.009865599870681762
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,32,12288,0.008748799562454224
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1024,128,128,0.0054848000407218935
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,32,16384,0.07536853154500325
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,32,10240,0.008071466783682505
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,32,65536,0.278387196858724
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,32,12288,0.053470933437347413
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,32,8192,0.007022933165232341
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,32,10240,0.042957866191864015
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,32,8192,0.03454613288243612
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,32,7168,0.006868266562620799
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,32,7168,0.03209493358929952
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,32,6144,0.006201600035031637
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,32,6144,0.029523199796676634
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,32,5120,0.0065290664633115125
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,32,4096,0.0060703997810681665
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,32,3072,0.006182399888833364
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,32,5120,0.029098665714263915
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,32,4096,0.024779733022054037
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,32,3584,0.006428800026575724
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,32,3584,0.023539199431737264
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,32,3072,0.02226346731185913
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,32,2560,0.006100266675154368
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,32,2048,0.005422933399677277
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,32,2560,0.021252266565958657
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,32,768,0.0037429332733154297
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,32,2048,0.019368533293406168
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,32,1536,0.0047199999292691554
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,32,1536,0.01813653310139974
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,32,1024,0.004045866678158442
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,32,1024,0.017147733767827352
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,32,512,0.0034495999415715536
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,32,768,0.01670080025990804
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,32,512,0.016101333498954772
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,32,256,0.0031189332405726117
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,32,128,0.0028362666567166646
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,32,256,0.015090133746465048
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,32,64,0.002776533365249634
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,32,128,0.014594133694966635
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,32,64,0.014567466576894126
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1024,32,32,0.0027424000203609467
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1024,32,32,0.01458560029665629
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,65536,12288,0.8710378646850586
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,65536,16384,1.1715861002604167
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,65536,10240,1.4279956817626953
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,65536,12288,1.6858623504638672
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,65536,10240,0.729144541422526
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,65536,16384,2.194151560465495
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,65536,8192,0.6281728108723958
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,65536,8192,1.0973567962646484
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,65536,7168,0.532315731048584
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,65536,7168,0.9306186676025391
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,65536,6144,0.9409119923909506
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,65536,6144,0.46600106557210286
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,65536,5120,0.3901866594950358
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,65536,5120,0.7356170654296875
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,65536,4096,0.5720799763997395
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,65536,4096,0.32522239685058596
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,65536,12288,1.2475733439127603
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,65536,10240,1.0751722971598308
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,65536,3584,0.28964265187581384
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,65536,3584,0.4835338592529297
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,65536,3072,0.4175733248392741
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,65536,16384,1.8980064392089844
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,65536,3072,0.2627701282501221
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,65536,2560,0.36534401575724285
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,65536,7168,0.7283157348632813
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,65536,2560,0.2150485356648763
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,65536,5120,0.5284288088480632
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,65536,2048,0.28269119262695314
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,65536,8192,0.8188789367675782
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,65536,2048,0.18268052736918133
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,65536,1536,0.21222292582194008
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,65536,1536,0.14637759526570637
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,65536,1024,0.15029120445251465
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,65536,6144,0.6255850474039714
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,65536,1024,0.1060640017191569
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,65536,768,0.11786346435546875
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,65536,768,0.0899178663889567
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,65536,4096,0.4190325419108073
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,65536,512,0.09280426502227783
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,65536,512,0.07526506582895914
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,65536,2560,0.269924259185791
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,65536,256,0.06236053307851156
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,65536,3584,0.3726112047831217
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,65536,256,0.06316159963607788
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,65536,128,0.050271999835968015
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,65536,128,0.05792640050252279
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,65536,2048,0.20672213236490883
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,65536,64,0.0450272003809611
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,65536,64,0.05971839825312296
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,65536,32,0.04523200194040934
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,65536,32,0.05911786556243896
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,65536,768,0.11681919892628986
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,65536,3072,0.3167797406514486
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,65536,1536,0.16711999575297037
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,16384,16384,0.5074154535929363
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,16384,16384,0.32073173522949217
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,65536,1024,0.12850346565246581
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,16384,65536,1.2869258880615235
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,16384,12288,0.38330879211425783
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,16384,12288,0.26464959780375164
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,16384,65536,2.284407552083333
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,16384,10240,0.3260160128275553
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,16384,10240,0.2231669267018636
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,16384,8192,0.2620192050933838
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,16384,8192,0.1689685344696045
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,65536,256,0.07953493595123291
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,65536,128,0.07417279879252116
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,16384,7168,0.22621332804361977
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,16384,7168,0.14851733843485515
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,16384,6144,0.19617387453715007
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,65536,512,0.09495786825815836
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,16384,6144,0.13149333000183105
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,16384,5120,0.1650272051493327
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,16384,5120,0.11308586597442627
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,16384,10240,0.2751178741455078
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,16384,12288,0.3166709264119466
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,16384,4096,0.1348917325337728
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,16384,4096,0.09715306758880615
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,16384,3584,0.11800320148468017
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,16384,16384,0.4086943944295247
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,16384,65536,1.8530506134033202
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,16384,3584,0.08722773392995199
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,16384,3072,0.07772479852040609
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,16384,3072,0.11903893152872722
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,16384,2560,0.10129706859588623
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,16384,2560,0.07103359699249268
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,16384,7168,0.17959146499633788
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,16384,2048,0.07105706532796224
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,16384,5120,0.13102293014526367
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,16384,2048,0.058798933029174806
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,16384,1536,0.05562986532847086
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,16384,1536,0.04998720089594523
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,16384,8192,0.2027029355367025
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,16384,1024,0.041517865657806394
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,16384,1024,0.041124268372853594
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,16384,6144,0.15435627301534016
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,16384,768,0.03335786660512288
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,16384,768,0.03733439842859904
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,16384,2560,0.0705578645070394
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,16384,512,0.026158932844797773
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,16384,512,0.032689066727956136
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,16384,4096,0.10555413564046223
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,16384,256,0.01844266653060913
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,16384,256,0.028013867139816285
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,16384,3584,0.09189653396606445
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,16384,128,0.01409280002117157
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,16384,128,0.02701866626739502
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,16384,3072,0.08006827036539713
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,16384,64,0.012227200468381246
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,16384,64,0.02707200050354004
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,16384,2048,0.056600534915924074
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,16384,32,0.012942933042844138
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,16384,32,0.02622186740239461
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,16384,1536,0.04756160179773967
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,12288,65536,0.9620277404785156
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,16384,768,0.03407253424326579
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,12288,65536,1.6649162292480468
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,16384,1024,0.038306132952372236
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,12288,16384,0.38613545099894203
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,12288,16384,0.24872105916341147
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,12288,12288,0.2871583938598633
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,12288,12288,0.20018240610758462
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,12288,10240,0.2406378746032715
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,12288,10240,0.16169919967651367
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,12288,8192,0.19255785942077636
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,12288,8192,0.13225920200347902
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,16384,256,0.024197334051132204
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,12288,7168,0.17000212669372558
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,12288,7168,0.11693759759267171
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,16384,128,0.022060799598693847
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,12288,6144,0.14489280382792155
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,16384,512,0.028347732623418172
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,12288,6144,0.1021888017654419
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,12288,5120,0.12175680001576741
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,12288,5120,0.08834880193074544
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,12288,12288,0.22935892740885416
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,12288,4096,0.10044053395589192
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,12288,4096,0.07568533420562744
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,12288,16384,0.30299199422200523
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,12288,65536,1.2824843088785807
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,12288,3584,0.08730666637420655
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,12288,3584,0.06890559991200765
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,12288,10240,0.19692479769388835
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,12288,3072,0.0763317346572876
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,12288,3072,0.06260799964269002
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,12288,6144,0.11910933653513592
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,12288,2560,0.06562133232752482
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,12288,2560,0.05523626804351807
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,12288,7168,0.13710187276204427
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,12288,2048,0.05298453172047933
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,12288,2048,0.047220265865325926
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,12288,8192,0.15682026545206706
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,12288,1536,0.04127360184987386
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,12288,1536,0.040232535203297934
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,12288,1024,0.029908267656962077
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,12288,1024,0.033045333623886106
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,12288,5120,0.09952426751454671
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,12288,768,0.02376213272412618
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,12288,768,0.029531733194986982
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,12288,4096,0.08195093472798666
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,12288,512,0.018654932578404747
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,12288,512,0.026510934034983318
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,12288,3072,0.061351466178894046
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,12288,256,0.01304533382256826
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,12288,256,0.02313813368479411
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,12288,1536,0.035369598865509035
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,12288,128,0.010012800494829815
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,12288,128,0.021683200200398763
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,12288,2560,0.05589013497034708
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,12288,3584,0.07121919790903727
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,12288,64,0.00844693382581075
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,12288,64,0.021342933177947998
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,12288,32,0.008801066875457763
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,12288,32,0.021918932596842446
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,12288,768,0.02341759999593099
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,12288,2048,0.0451530655225118
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,10240,65536,1.420473607381185
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,10240,65536,0.8564767837524414
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,10240,16384,0.32940905888875327
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,10240,16384,0.2544586658477783
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,10240,12288,0.2519872029622396
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,10240,12288,0.17358187039693196
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,10240,10240,0.21136852900187172
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,10240,10240,0.14904853502909343
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,12288,1024,0.031022934118906657
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,10240,8192,0.1646527926127116
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,10240,8192,0.1166431983311971
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,12288,256,0.016364799936612447
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,10240,7168,0.14682666460673016
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,12288,512,0.01973653237024943
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,10240,7168,0.10533013343811035
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,10240,6144,0.1269802649815877
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,12288,128,0.014138666788736978
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,10240,6144,0.09353280067443848
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,10240,5120,0.10676586627960205
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,10240,5120,0.08184853394826254
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,10240,12288,0.1962922732035319
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,10240,4096,0.08626560370127359
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,10240,4096,0.06878720124562582
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,10240,16384,0.25839145978291833
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,10240,3584,0.07631146907806396
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,10240,65536,1.0624117533365884
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,10240,3584,0.06289173364639282
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,10240,3072,0.06598079999287923
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,10240,3072,0.05648320118586222
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,10240,6144,0.1014474630355835
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,10240,10240,0.17308053970336915
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,10240,2560,0.056085332234700525
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,10240,2560,0.050670933723449704
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,10240,7168,0.12136747042338054
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,10240,2048,0.0470794677734375
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,10240,2048,0.04302719831466675
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,10240,1536,0.03673706849416097
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,10240,1536,0.03747946818669637
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,10240,8192,0.1348522663116455
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,10240,1024,0.026060799757639568
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,10240,1024,0.03060693343480428
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,10240,5120,0.08752000331878662
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,10240,768,0.02185813387235006
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,10240,768,0.02797546585400899
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,10240,4096,0.07040106455485026
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,10240,3072,0.05335359970728556
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,10240,512,0.016426666577657064
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,10240,512,0.025250132878621417
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,10240,256,0.011971199512481689
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,10240,256,0.025730133056640625
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,10240,3584,0.06389653285344442
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,10240,128,0.010637866457303365
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,10240,128,0.022102399667104086
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,10240,1536,0.0321450670560201
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,10240,64,0.009715200463930766
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,10240,64,0.022911999622980753
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,10240,32,0.010160000125567118
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,10240,32,0.02285439968109131
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,10240,2560,0.047755734125773115
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,8192,65536,1.0830708821614583
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,8192,65536,0.7334933598836263
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,10240,2048,0.0412501335144043
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,8192,16384,0.2622901280721029
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,8192,16384,0.1863584041595459
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,10240,256,0.01564586659272512
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,8192,12288,0.19663039843241376
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,8192,12288,0.14081279436747235
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,10240,1024,0.027084799607594807
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,10240,512,0.018484266599019368
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,10240,768,0.022082134087880453
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,8192,10240,0.16420159339904786
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,8192,10240,0.12029333114624023
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,8192,8192,0.13221440315246583
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,8192,8192,0.09934186935424805
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,8192,7168,0.11763947010040283
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,10240,128,0.013393066326777139
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,8192,7168,0.08951893647511801
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,8192,6144,0.10156479676564534
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,8192,6144,0.07975146770477295
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,8192,5120,0.08653120199839273
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,8192,5120,0.07065386772155761
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,8192,65536,0.8637130737304688
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,8192,10240,0.141265074412028
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,8192,4096,0.08606506983439127
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,8192,4096,0.05909013350804647
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,8192,3584,0.06137386560440063
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,8192,3584,0.05356160004933676
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,8192,12288,0.1627413272857666
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,8192,16384,0.20866880416870118
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,8192,3072,0.05266666809717814
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,8192,3072,0.04778560002644856
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,8192,2560,0.044964265823364255
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,8192,2560,0.04252800146738688
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,8192,7168,0.09216319719950358
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,8192,2048,0.03661866585413615
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,8192,2048,0.03685119946797689
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,8192,8192,0.10585707028706867
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,8192,5120,0.06717120011647543
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,8192,1536,0.0284223993619283
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,8192,1536,0.03211733301480611
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,8192,6144,0.08155413468678793
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,8192,1024,0.020844799280166627
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,8192,1024,0.02758293350537618
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,8192,768,0.017130666971206666
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,8192,768,0.02555733323097229
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,8192,2560,0.03801066478093465
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,8192,512,0.01332586705684662
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,8192,512,0.022966400782267252
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,8192,4096,0.0550816019376119
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,8192,256,0.009410132964452107
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,8192,256,0.02026559909184774
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,8192,2048,0.03186986645062764
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,8192,128,0.007479466497898102
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,8192,3584,0.04911786715189616
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,8192,128,0.01893333395322164
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,8192,64,0.006497066716353099
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,8192,64,0.019117865959803262
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,8192,32,0.006706133484840393
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,8192,32,0.01927679975827535
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,8192,3072,0.04325439929962158
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,8192,1536,0.02693866689999898
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,8192,768,0.019313067197799683
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,7168,65536,0.9001205444335938
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,7168,16384,0.22990825970967613
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,7168,16384,0.18746879895528157
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,7168,65536,0.6519573211669922
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,7168,12288,0.17222399711608888
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,7168,12288,0.13975253105163574
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,7168,10240,0.144160000483195
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,8192,1024,0.02170133392016093
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,7168,10240,0.11247146924336751
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,7168,8192,0.11682346661885579
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,7168,8192,0.09210453033447266
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,8192,128,0.01285653313000997
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,8192,256,0.013921067118644714
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,7168,7168,0.10332053502400715
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,7168,7168,0.08261760075887045
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,7168,6144,0.08982613086700439
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,7168,6144,0.07329599857330323
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,8192,512,0.016389333208402
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,7168,5120,0.0747968037923177
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,7168,5120,0.06347306569417319
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,7168,12288,0.14005333582560223
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,7168,4096,0.0603061318397522
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,7168,4096,0.053783468405405675
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,7168,16384,0.18658773104349774
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,7168,3584,0.053660798072814944
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,7168,65536,0.7480618794759114
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,7168,3584,0.048799999554951984
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,7168,3072,0.04637226661046346
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,7168,3072,0.04400746822357178
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,7168,10240,0.12174186706542969
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,7168,2560,0.04011840025583903
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,7168,2560,0.03917653163274129
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,7168,8192,0.09776960213979086
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,7168,2048,0.03207146724065145
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,7168,6144,0.07317333221435547
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,7168,2048,0.034034132957458496
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,7168,1536,0.02513493299484253
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,7168,1536,0.029549866914749146
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,7168,7168,0.08546986579895019
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,7168,1024,0.018438400824864705
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,7168,1024,0.0249834676583608
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,7168,5120,0.06245119969050089
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,7168,3072,0.03906559944152832
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,7168,768,0.015086932977040609
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,7168,768,0.023011199633280435
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,7168,4096,0.05083306630452474
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,7168,512,0.01158186693986257
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,7168,512,0.022191999355951945
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,7168,256,0.008916266759236654
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,7168,1536,0.02342080076535543
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,7168,2560,0.03603626489639282
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,7168,256,0.02039146622021993
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,7168,128,0.007712000111738841
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,7168,128,0.019056000312169395
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,7168,64,0.007428266604741414
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,7168,64,0.018953599532445273
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,7168,32,0.007666133344173431
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,7168,3584,0.04533333381017049
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,7168,32,0.019206400712331137
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,6144,65536,0.574722162882487
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,6144,65536,0.7389919916788737
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,6144,16384,0.1892031987508138
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,6144,16384,0.15448106129964193
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,7168,1024,0.020108799139658608
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,7168,2048,0.029345067342122395
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,6144,12288,0.14295786221822102
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,6144,12288,0.11516799926757812
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,6144,10240,0.12047146956125895
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,6144,10240,0.09869866371154785
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,7168,128,0.009751466910044353
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,7168,256,0.01102186640103658
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,6144,8192,0.0982805331548055
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,7168,512,0.013487999637921652
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,6144,8192,0.08171947002410888
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,6144,7168,0.08455253442128499
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,6144,7168,0.07373332977294922
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,6144,6144,0.07316266695658366
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,6144,6144,0.06584959824879964
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,7168,768,0.016261333227157594
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,6144,5120,0.06212480068206787
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,6144,5120,0.05804053147633871
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,6144,12288,0.13757333755493165
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,6144,4096,0.05057386557261149
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,6144,4096,0.049615999062856034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,6144,16384,0.17256107330322265
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,6144,65536,0.7527104059855143
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,6144,3584,0.04439786672592163
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,6144,3584,0.05244693358739218
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,6144,3072,0.04493546485900879
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,6144,3072,0.045849601427714035
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,6144,10240,0.11747946739196777
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,6144,2560,0.03342399994532268
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,6144,2560,0.036135466893514
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,6144,6144,0.06626346508661905
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,6144,2048,0.02754666606585185
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,6144,2048,0.031999999284744264
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,6144,7168,0.07345600128173828
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,6144,1536,0.022024534145991006
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,6144,1536,0.028724267085393267
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,6144,8192,0.08716479937235513
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,6144,1024,0.0164874662955602
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,6144,1024,0.024545067548751832
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,6144,5120,0.05469013452529907
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,6144,768,0.013809067010879517
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,6144,768,0.023105067014694215
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,6144,4096,0.046811731656392415
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,6144,3072,0.03544960021972656
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,6144,512,0.010893866419792175
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,6144,512,0.020718934138615926
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,6144,256,0.007807999849319458
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,6144,256,0.01918720006942749
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,6144,2560,0.032013867298762
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,6144,128,0.006638933221499126
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,6144,128,0.018154666821161906
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,6144,3584,0.040345601240793866
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,6144,64,0.005852800110975901
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,6144,64,0.01841493248939514
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,6144,32,0.006051200131575266
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,6144,32,0.018318933248519898
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,6144,1536,0.02167466680208842
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,5120,65536,0.6478709538777669
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,5120,65536,0.521230920155843
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,5120,16384,0.1637781302134196
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,5120,16384,0.1366410732269287
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,6144,2048,0.026700800657272337
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,5120,12288,0.12396799723307292
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,5120,12288,0.10416959921518962
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,6144,768,0.015424000223477683
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,5120,10240,0.10460373560587566
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,6144,1024,0.0184608002503713
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,5120,10240,0.08934079806009929
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,5120,8192,0.08424106438954672
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,5120,8192,0.07493226528167725
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,6144,256,0.010867200295130412
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,5120,7168,0.0744330644607544
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,5120,7168,0.06880106925964355
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,6144,512,0.012781866391499839
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,5120,6144,0.06439786752065023
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,6144,128,0.00993173321088155
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,5120,6144,0.06147520144780477
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,5120,5120,0.05435946782430014
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,5120,5120,0.05364053249359131
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,5120,12288,0.10617492993672688
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,5120,4096,0.04415466785430908
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,5120,4096,0.04551999966303508
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,5120,65536,0.5684576034545898
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,5120,16384,0.13942186037699383
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,5120,3584,0.040906667709350586
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,5120,3584,0.0413269321123759
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,5120,3072,0.0344320019086202
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,5120,3072,0.03728533188501994
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,5120,10240,0.0965930700302124
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,5120,2560,0.029845333099365233
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,5120,6144,0.055335466066996256
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,5120,2560,0.03353386720021566
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,5120,2048,0.024699733654658
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,5120,2048,0.03004586696624756
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,5120,8192,0.0739573319753011
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,5120,1536,0.01953279972076416
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,5120,1536,0.02640639940897624
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,5120,7168,0.06770666440327963
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,5120,1024,0.014709333578745524
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,5120,1024,0.023835732539494833
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,5120,5120,0.04795519908269246
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,5120,3072,0.03112000028292338
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,5120,768,0.012601600090662638
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,5120,4096,0.04018666744232178
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,5120,768,0.02228906750679016
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,5120,512,0.00990613301595052
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,5120,512,0.02023893396059672
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,5120,256,0.007375999788443248
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,5120,256,0.01843093236287435
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,5120,1536,0.01941759983698527
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,5120,128,0.00602346658706665
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,5120,128,0.017548799514770508
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,5120,2560,0.028253867228825884
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,5120,64,0.0054400001962979635
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,5120,64,0.01748159925142924
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,5120,32,0.005559466779232025
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,5120,3584,0.035943468411763504
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,5120,32,0.01757226586341858
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,4096,65536,0.5125247955322265
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,4096,65536,0.4555232048034668
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,4096,16384,0.13231253623962402
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,4096,16384,0.11865706443786621
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,5120,2048,0.02409600019454956
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,4096,12288,0.10138879617055256
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,4096,12288,0.09153280258178711
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,5120,1024,0.01634666621685028
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,4096,10240,0.082968537012736
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,4096,10240,0.07804693380991617
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,5120,256,0.010106666882832845
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,4096,8192,0.06675733725229899
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,4096,8192,0.06709866523742676
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,5120,512,0.011455999811490376
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,5120,128,0.008973866701126099
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,4096,7168,0.05881173213322958
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,4096,7168,0.060096001625061034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,4096,6144,0.051252265771230064
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,4096,6144,0.053515732288360596
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,5120,768,0.014065066973368326
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,4096,5120,0.043332266807556155
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,4096,5120,0.0468938668568929
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,4096,12288,0.09868799845377604
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,4096,4096,0.03593173424402873
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,4096,4096,0.03964373270670573
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,4096,16384,0.131549866994222
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,4096,65536,0.5521770477294922
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,4096,3584,0.03155946731567383
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,4096,3584,0.036505599816640213
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,4096,3072,0.0273087998231252
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,4096,3072,0.03370453516642253
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,4096,10240,0.09036906560262045
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,4096,2560,0.02320746580759684
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,4096,2560,0.02946773370107015
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,4096,6144,0.05117119948069254
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,4096,2048,0.019418666760126747
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,4096,2048,0.026898133754730224
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,4096,7168,0.05469013452529907
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,4096,1536,0.015736533204714458
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,4096,1536,0.024114133914311726
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,4096,8192,0.05990293423334757
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,4096,1024,0.011987200379371643
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,4096,1024,0.021386667092641195
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,4096,5120,0.04099520047505696
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,4096,768,0.010073600212732951
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,4096,768,0.020104533433914183
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,4096,3072,0.0269322673479716
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,4096,512,0.007930666704972585
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,4096,512,0.019065600633621217
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,4096,4096,0.03454826672871907
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,4096,256,0.0061247999469439185
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,4096,256,0.017361066738764443
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,4096,3584,0.029972267150878907
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,4096,128,0.005269333223501841
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,4096,128,0.016849066813786825
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,4096,2560,0.024125866095225015
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,4096,64,0.0047872001926104225
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,4096,64,0.016923733552296958
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,4096,32,0.004994133114814758
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,4096,32,0.017027199268341064
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,4096,1536,0.016755199432373045
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,3584,65536,0.4478624025980632
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,3584,65536,0.41880639394124347
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,4096,2048,0.02028586665789286
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,3584,16384,0.11492479642232259
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,4096,1024,0.014485333363215128
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,3584,16384,0.11023253599802654
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,3584,12288,0.08726826508839926
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,3584,12288,0.08496320247650146
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,4096,768,0.011750400066375732
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,3584,10240,0.07340266704559326
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,3584,10240,0.0740991989771525
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,4096,256,0.00886400043964386
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,3584,8192,0.06023573478062948
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,3584,8192,0.06320319970448812
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,4096,512,0.010059733192125957
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,3584,7168,0.05187840064366659
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,3584,7168,0.057643731435139976
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,3584,6144,0.044915199279785156
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,3584,6144,0.05151679913202921
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,4096,128,0.008181333541870117
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,3584,5120,0.03861120144526164
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,3584,5120,0.044766934712727864
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,3584,12288,0.09578986962636313
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,3584,4096,0.03159466584523519
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,3584,4096,0.03762666781743367
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,3584,16384,0.12477013270060222
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,3584,3584,0.02803093393643697
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,3584,65536,0.48815574645996096
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,3584,3584,0.03461120128631592
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,3584,3072,0.024488532543182374
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,3584,3072,0.03253333369890849
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,3584,10240,0.08601813316345215
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,3584,6144,0.045109331607818604
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,3584,2560,0.02131519913673401
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,3584,2560,0.02895680069923401
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,3584,2048,0.01826453407605489
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,3584,2048,0.026726400852203368
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,3584,7168,0.0517962654431661
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,3584,1536,0.014856533209482829
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,3584,1536,0.0243562658627828
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,3584,8192,0.05993920167287191
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,3584,1024,0.01163093348344167
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,3584,1024,0.020849066972732543
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,3584,5120,0.03919893503189087
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,3584,768,0.010006399949391682
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,3584,768,0.02016213337580363
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,3584,3072,0.025845332940419512
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,3584,512,0.007906133433183034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,3584,512,0.019272534052530925
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,3584,4096,0.03252480030059814
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,3584,256,0.006621866424878438
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,3584,256,0.01760960022608439
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,3584,1536,0.01658453345298767
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,3584,128,0.005595733225345611
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,3584,128,0.01699626644452413
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,3584,2560,0.023187200228373207
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,3584,64,0.005096533397833506
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,3584,64,0.017670400937398276
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,3584,3584,0.02906773289044698
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,3584,32,0.005386666456858317
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,3584,32,0.017442133029301962
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,3072,65536,0.3889514605204264
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,3072,65536,0.3991765340169271
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,3584,2048,0.020036266247431437
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,3072,16384,0.09886720180511474
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,3072,16384,0.10241493384043376
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,3584,1024,0.01358080009619395
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,3072,12288,0.07460052967071533
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,3072,12288,0.08055253028869629
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,3584,768,0.011592533191045125
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,3072,10240,0.0645792007446289
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,3072,10240,0.07011306285858154
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,3072,8192,0.05149120092391968
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,3072,8192,0.05930879910786947
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,3584,256,0.008644266923268636
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,3072,7168,0.04509546756744385
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,3072,7168,0.053010133902231846
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,3584,512,0.010077866911888122
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,3072,6144,0.03922986586888631
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,3584,128,0.008025600016117096
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,3072,6144,0.047622398535410566
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,3072,5120,0.033896533648173015
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,3072,5120,0.04133439858754476
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,3072,12288,0.09258026281992594
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,3072,4096,0.026850134134292603
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,3072,4096,0.03564373254776001
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,3072,65536,0.4923904101053874
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,3072,16384,0.11907839775085449
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,3072,3584,0.025000532468159992
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,3072,3584,0.03220799962679545
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,3072,3072,0.022645332415898643
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,3072,10240,0.07788586616516113
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,3072,3072,0.030455466111501055
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,3072,2560,0.018387200435002644
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,3072,2560,0.027552000681559247
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,3072,8192,0.05529280106226603
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,3072,2048,0.015625600020090738
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,3072,2048,0.024663466215133666
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,3072,6144,0.04386133352915446
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,3072,1536,0.012685867150624594
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,3072,7168,0.05055466492970785
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,3072,1536,0.022338134050369263
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,3072,5120,0.03750293254852295
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,3072,1024,0.009913600484530131
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,3072,1024,0.02090559999148051
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,3072,768,0.008164266745249431
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,3072,768,0.0191103994846344
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,3072,3072,0.0233514666557312
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,3072,512,0.0067456002036730455
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,3072,4096,0.030264532566070555
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,3072,512,0.018318933248519898
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,3072,256,0.005498666564623515
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,3072,256,0.01694933374722799
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,3072,3584,0.026821333169937133
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,3072,2560,0.021600000063578286
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,3072,128,0.004729599754015604
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,3072,128,0.016404267152150473
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,3072,64,0.004273066421349844
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,3072,64,0.016502400239308677
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,3072,32,0.00453653335571289
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,3072,32,0.016432000199953715
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,3072,1536,0.015101866920789084
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,2560,65536,0.3371744155883789
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,3072,2048,0.018246400356292724
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,2560,65536,0.36013654073079426
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,2560,16384,0.08650453090667724
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,2560,16384,0.0951200008392334
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,2560,12288,0.06533759832382202
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,2560,12288,0.07392213344573975
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,3072,768,0.010759466886520385
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,2560,10240,0.05439573526382446
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,3072,1024,0.012744533022244773
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,2560,10240,0.06529386838277182
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,2560,8192,0.04421653350194295
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,2560,8192,0.05545920133590698
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,3072,256,0.008330666522185007
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,2560,7168,0.03996266523996989
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,2560,7168,0.05045973459879557
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,3072,512,0.009337600072224934
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,3072,128,0.007630933324495952
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,2560,6144,0.03376746575037638
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,2560,6144,0.0437450647354126
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,2560,5120,0.029148799180984498
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,2560,5120,0.03754239877065023
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,2560,12288,0.0738207976023356
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,2560,4096,0.023808000485102336
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,2560,4096,0.03293866713841756
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,2560,16384,0.08291733264923096
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,2560,65536,0.3143967946370443
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,2560,3584,0.02143893241882324
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,2560,3584,0.02983466585477193
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,2560,3072,0.01848640044530233
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,2560,3072,0.027825067440668743
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,2560,6144,0.03718719879786174
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,2560,2560,0.01627306640148163
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,2560,2560,0.026346667607625322
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,2560,10240,0.06212160189946493
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,2560,2048,0.013853866855303446
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,2560,2048,0.024228266874949136
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,2560,7168,0.0422815998395284
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,2560,1536,0.01160533328851064
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,2560,1536,0.02211306691169739
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,2560,8192,0.04599039951960246
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,2560,1024,0.008746666709582011
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,2560,1024,0.019554134209950766
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,2560,5120,0.03158826629320781
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,2560,768,0.007522133489449819
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,2560,768,0.018540799617767334
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,2560,3072,0.02100906570752462
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,2560,4096,0.028032000859578448
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,2560,512,0.006265600025653839
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,2560,512,0.017480534315109254
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,2560,256,0.004972800115744273
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,2560,256,0.0165802667538325
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,2560,1536,0.012698666254679362
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,2560,3584,0.023526400327682495
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,2560,128,0.004466133316357931
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,2560,128,0.016114133596420287
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,2560,2560,0.01866453289985657
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,2560,64,0.004287999868392944
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,2560,32,0.004327466587225596
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,2560,64,0.016105600198109946
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,2560,32,0.016272000471750894
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,2048,65536,0.2614677270253499
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,2048,65536,0.33538878758748375
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,2048,16384,0.09229546387990316
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,2048,16384,0.0983253320058187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,2560,2048,0.015758933623631795
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,2048,12288,0.0738922675450643
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,2560,768,0.009386666615804036
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,2048,12288,0.07841813564300537
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,2048,10240,0.06062933206558228
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,2048,10240,0.06825599670410157
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,2560,1024,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,2048,8192,0.046380798021952316
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,2048,8192,0.05077653328577677
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,2560,256,0.00737066666285197
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,2048,7168,0.04262293179829915
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,2048,7168,0.045875199635823566
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,2560,512,0.008109866579373678
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,2560,128,0.006646400193373363
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,2048,6144,0.036673065026601157
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,2048,6144,0.041196799278259276
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,2048,5120,0.031042132774988813
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,2048,5120,0.03504853248596192
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,2048,12288,0.06647786696751913
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,2048,65536,0.33629013697306315
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,2048,4096,0.02031360069910685
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,2048,4096,0.031004800399144487
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,2048,16384,0.08976533412933349
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,2048,3584,0.018786134322484334
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,2048,3584,0.028178133567174274
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,2048,3072,0.01663146714369456
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,2048,3072,0.026510934034983318
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,2048,10240,0.05682346820831299
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,2048,2560,0.014165332913398743
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,2048,2560,0.024654932816823325
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,2048,8192,0.044300798575083414
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,2048,2048,0.012644267082214356
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,2048,2048,0.021265067656834922
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,2048,6144,0.03544960021972656
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,2048,7168,0.039426132043202715
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,2048,1536,0.010158933202425639
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,2048,1536,0.020594133933385213
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,2048,1024,0.007485866546630859
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,2048,1024,0.018394666910171508
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,2048,3072,0.019350399573644005
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,2048,768,0.006385066608587901
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,2048,768,0.018088533480962118
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,2048,4096,0.025600000222524004
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,2048,512,0.0054517333706219995
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,2048,5120,0.029796266555786134
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,2048,512,0.0171615997950236
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,2048,256,0.004589866598447164
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,2048,256,0.016307199994723
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,2048,3584,0.021065600713094077
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,2048,128,0.004014933357636133
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,2048,128,0.0160863995552063
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,2048,2560,0.017121066649754844
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,2048,1536,0.011632000406583149
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,2048,64,0.0037237333754698435
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,2048,32,0.0039018665750821433
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,2048,64,0.015941333770751954
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,2048,32,0.01581546664237976
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,1536,65536,0.20445653597513833
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,1536,65536,0.30139519373575846
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,1536,16384,0.05793279806772868
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,1536,16384,0.08126293023427328
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,2048,1024,0.009769599636395771
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,2048,2048,0.014237866799036662
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,1536,12288,0.042393600940704344
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,1536,12288,0.06410666704177856
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,1536,10240,0.03548053503036499
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,1536,10240,0.05580480098724365
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,2048,256,0.006728533407052357
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,2048,128,0.00622080018122991
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,1536,8192,0.02939093311627706
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,1536,8192,0.044759468237559004
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,2048,768,0.008438400427500407
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,1536,7168,0.02550506591796875
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,1536,7168,0.04018986622492472
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,2048,512,0.00735040009021759
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,1536,6144,0.022399999698003135
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,1536,6144,0.03548053503036499
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,1536,5120,0.01904746691385905
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,1536,5120,0.031729066371917726
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,1536,65536,0.30047359466552737
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,1536,4096,0.01625920037428538
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,1536,16384,0.07886826992034912
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,1536,4096,0.028625067075093585
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,1536,12288,0.05744959910710653
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,1536,3584,0.01470080018043518
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,1536,3584,0.025778132677078246
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,1536,3072,0.012798933188120523
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,1536,3072,0.02411093314488729
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,1536,10240,0.04750933249791463
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,1536,2560,0.01142080028851827
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,1536,2560,0.02212693293889364
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,1536,6144,0.02952959934870402
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,1536,2048,0.012466133634249369
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,1536,2048,0.020921599864959717
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,1536,7168,0.032893866300582886
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,1536,1536,0.010106666882832845
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,1536,1536,0.02129279971122742
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,1536,8192,0.03919359842936198
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,1536,1024,0.00747626672188441
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,1536,1024,0.018590933084487914
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,1536,5120,0.025512532393137617
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,1536,768,0.0064085334539413456
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,1536,768,0.017526400089263917
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,1536,3072,0.015960533420244852
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,1536,512,0.005205333232879639
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,1536,512,0.01714986761411031
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,1536,4096,0.019988266626993816
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,1536,256,0.004172799984614054
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,1536,256,0.01579093337059021
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,1536,3584,0.01785599986712138
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,1536,128,0.0037621334195137024
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,1536,2560,0.014485333363215128
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,1536,128,0.01577279965082804
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,1536,1536,0.010580266515413921
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,1536,64,0.003571200122435888
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,1536,64,0.015655466914176942
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,1536,32,0.003689600030581156
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,1536,32,0.01567893326282501
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,1024,65536,0.13846933046976725
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,1536,2048,0.012247467041015625
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,1024,65536,0.26712214152018227
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,1024,16384,0.04724160035451253
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,1536,1024,0.009013332923253377
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,1024,16384,0.08719253540039062
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,1536,768,0.008096000055472057
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,1024,12288,0.0503541350364685
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,1024,12288,0.06661760012308757
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,1024,10240,0.04294506708780925
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,1024,10240,0.052065066496531164
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,1024,8192,0.03487146695454915
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,1536,256,0.006375466783841451
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,1024,8192,0.04063040018081665
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,1024,7168,0.031004800399144487
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,1024,7168,0.03691200017929077
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,1536,512,0.007001600166161854
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,1536,128,0.005883733431498209
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,1024,6144,0.025921066602071125
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,1024,6144,0.03338026603062948
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,1024,5120,0.018351999918619792
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,1024,5120,0.02959253390630086
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,1024,12288,0.0476149320602417
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,1024,4096,0.014260266224543253
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,1024,4096,0.025596799453099568
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,1024,65536,0.26221332550048826
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,1024,16384,0.06331733465194703
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,1024,3584,0.011885866522789001
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,1024,3584,0.024649600187937416
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,1024,3072,0.012216533223787945
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,1024,3072,0.022801067431767783
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,1024,10240,0.04370986620585124
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,1024,2560,0.010526933272679647
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,1024,2560,0.021358933051427206
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,1024,6144,0.024296534061431885
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,1024,2048,0.008357333143552144
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,1024,2048,0.019800533850987755
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,1024,8192,0.0300053338209788
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,1024,7168,0.027502934137980144
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,1024,1536,0.0071733335653940845
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,1024,1536,0.019132800896962485
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,1024,1024,0.005619200070699056
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,1024,1024,0.017729065815607705
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,1024,5120,0.021689599752426146
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,1024,768,0.004910933474699656
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,1024,768,0.016770132382710776
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,1024,3072,0.014973866939544677
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,1024,512,0.00436160018046697
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,1024,512,0.016354133685429893
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,1024,4096,0.01773866613705953
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,1024,256,0.0037823999921480812
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,1024,256,0.015527466932932535
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,1024,1536,0.00912000040213267
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,1024,128,0.0034314667185147605
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,1024,3584,0.015703466534614564
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,1024,128,0.015223466356595359
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,1024,64,0.0032810665667057036
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,1024,2560,0.012381866574287415
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,1024,64,0.015245866775512696
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,1024,32,0.003409066547950109
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,1024,32,0.01529706617196401
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,768,65536,0.1124831994374593
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,1024,1024,0.008025600016117096
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,768,65536,0.25099093119303384
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,1024,2048,0.010709333419799804
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,768,16384,0.03431466817855835
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,768,16384,0.07305173079172769
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,768,12288,0.02807146708170573
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,768,12288,0.05519786675771078
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,768,10240,0.026791467269261675
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,1024,768,0.007106133302052816
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,768,10240,0.045560534795125326
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,768,8192,0.01939520041147868
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,768,8192,0.037604268391927084
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,1024,256,0.0058442667126655575
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,768,7168,0.017326933145523072
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,1024,128,0.005341866612434387
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,768,7168,0.03434880177179973
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,1024,512,0.00639466643333435
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,768,6144,0.015254400173823037
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,768,6144,0.03107306758562724
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,768,5120,0.013386666774749756
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,768,5120,0.030830933650334673
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,768,12288,0.042003198464711504
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,768,4096,0.010808533430099488
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,768,4096,0.02648426691691081
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,768,65536,0.23482559521993002
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,768,16384,0.057175465424855555
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,768,3584,0.009960533181826273
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,768,3584,0.02449280023574829
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,768,3072,0.00895146628220876
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,768,10240,0.03620800177256266
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,768,3072,0.02367146611213684
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,768,2560,0.01139306624730428
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,768,2560,0.02184213399887085
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,768,8192,0.02850666642189026
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,768,6144,0.02281493345896403
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,768,2048,0.009604266285896302
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,768,2048,0.020819199085235596
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,768,1536,0.007993599772453308
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,768,7168,0.025711999336878462
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,768,1536,0.01917333404223124
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,768,1024,0.006073600053787232
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,768,1024,0.017526400089263917
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,768,5120,0.01972800095876058
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,768,768,0.005389866729577383
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,768,768,0.016714666287104288
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,768,3072,0.01316159963607788
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,768,4096,0.016177067160606386
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,768,512,0.004488533238569895
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,768,512,0.0172437330087026
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,768,256,0.003736533224582672
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,768,256,0.015749333302179973
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,768,3584,0.01455573340257009
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,768,128,0.003357866654793421
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,768,128,0.015612799922625223
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,768,2560,0.011872000495592753
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,768,64,0.0031957333286603295
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,768,64,0.015273599823315939
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,768,32,0.0031850665807724
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,768,32,0.01530239979426066
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,512,65536,0.07849493026733398
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,512,65536,0.23810985883076988
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,768,1536,0.008892800410588582
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,512,16384,0.027134933074315387
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,512,16384,0.0695520003636678
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,768,1024,0.007901866734027863
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,768,2048,0.010618666807810467
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,512,12288,0.02123840053876241
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,512,12288,0.05093866586685181
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,512,10240,0.019822933276494346
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,768,768,0.007117866476376851
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,512,10240,0.0425002654393514
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,512,8192,0.016761600971221924
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,512,8192,0.03579839865366618
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,768,256,0.0056618665655454
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,768,512,0.00629013329744339
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,512,7168,0.015124266346295675
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,512,7168,0.032025599479675294
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,512,6144,0.013633066415786743
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,768,128,0.0052704001466433205
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,512,6144,0.02924586733182271
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,512,5120,0.012242133418718975
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,512,5120,0.025923200448354083
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,512,65536,0.20817813873291016
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,512,4096,0.01009386678536733
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,512,4096,0.023834667603174844
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,512,12288,0.040445868174235025
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,512,3584,0.00899733304977417
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,512,16384,0.052538665135701504
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,512,3584,0.02244053284327189
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,512,3072,0.008239999910195668
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,512,3072,0.021421867609024047
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,512,10240,0.0342741330464681
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,512,2560,0.0072053333123524976
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,512,2560,0.020331732432047524
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,512,8192,0.02784000039100647
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,512,6144,0.021303466955820718
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,512,2048,0.0066442668437957765
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,512,2048,0.018956800301869713
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,512,7168,0.02471253275871277
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,512,1536,0.005490133166313171
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,512,1536,0.01907520095507304
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,512,1024,0.004673066735267639
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,512,1024,0.017058134078979492
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,512,5120,0.018233599265416463
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,512,768,0.004229333500067393
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,512,768,0.016471466422080992
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,512,4096,0.015416533748308817
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,512,512,0.0037791999677817024
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,512,512,0.01627413332462311
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,512,3072,0.012805333733558655
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,512,256,0.00348693331082662
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,512,256,0.015318399667739869
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,512,3584,0.013954133788744608
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,512,128,0.0031338666876157125
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,512,128,0.014963199694951376
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,512,2560,0.011597866813341778
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,512,64,0.0030559999247392017
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,512,1536,0.008496000369389852
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,512,64,0.014867200454076131
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,512,32,0.0031637333333492277
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,512,32,0.015284267067909241
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,256,65536,0.05758293469746908
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,512,2048,0.010006399949391682
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,256,65536,0.22428693771362304
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,512,1024,0.0077450667818387345
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,256,16384,0.016007467110951742
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,256,16384,0.06283946832021078
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,256,12288,0.015615999698638916
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,256,12288,0.04710293213526408
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,256,10240,0.013726933797200521
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,256,10240,0.03973439931869507
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,512,768,0.006821333368619282
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,256,8192,0.011969066659609477
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,512,512,0.006056533257166544
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,256,8192,0.0329749325911204
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,512,256,0.0056533331672350565
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,256,7168,0.011136000355084736
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,256,7168,0.03149333397547404
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,512,128,0.005186133086681366
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,256,6144,0.012000000476837159
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,256,6144,0.028436267375946046
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,256,5120,0.01020906666914622
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,256,5120,0.025668267409006757
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,256,65536,0.171833594640096
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,256,4096,0.008653866251309712
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,256,16384,0.04507840077082316
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,256,12288,0.034481068452199296
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,256,4096,0.023171200354894003
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,256,3584,0.007916800181070964
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,256,3584,0.02204266587893168
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,256,3072,0.007157333195209503
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,256,10240,0.03012053370475769
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,256,3072,0.021160533030827842
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,256,2560,0.006769066552321117
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,256,2560,0.01967039903004964
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,256,8192,0.023521065711975098
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,256,2048,0.005980800092220307
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,256,2048,0.0191103994846344
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,256,7168,0.020949333906173706
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,256,1536,0.005246933301289876
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,256,1536,0.017953066031138103
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,256,6144,0.019029333194096883
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,256,1024,0.004506666461626689
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,256,1024,0.016694400707880655
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,256,5120,0.016424533724784852
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,256,768,0.004161066561937332
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,256,768,0.016531200210253397
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,256,4096,0.014131200313568116
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,256,512,0.0037493333220481873
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,256,512,0.016153599818547568
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,256,3072,0.011780266960461933
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,256,256,0.003310933212439219
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,256,256,0.015150933464368185
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,256,3584,0.013059199849764506
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,256,128,0.0030965333183606463
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,256,128,0.015049599607785544
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,256,2560,0.010700800021489461
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,256,64,0.002867199977238973
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,256,64,0.014929067095120749
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,256,32,0.002922666569550832
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,256,32,0.014754133423169455
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,128,65536,0.04747946659723918
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,128,65536,0.21686719258626302
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,256,2048,0.009113599856694538
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,128,16384,0.010744532942771912
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,128,16384,0.05791039864222208
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,256,768,0.006437333424886067
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,128,12288,0.009276800354321798
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,256,1024,0.007101866602897644
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,128,12288,0.04130239884058635
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,256,1536,0.00839573343594869
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,128,10240,0.01032319962978363
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,128,10240,0.03587520122528076
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,128,8192,0.009032533566157023
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,128,8192,0.030696533123652142
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,256,512,0.0058559998869895935
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,256,256,0.005305600166320801
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,128,7168,0.00865066647529602
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,128,7168,0.03160106738408407
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,256,128,0.005011199911435445
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,128,6144,0.008083199958006541
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,128,6144,0.02770026723543803
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,128,5120,0.007949866851170858
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,128,5120,0.02542720039685567
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,128,65536,0.167523193359375
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,128,4096,0.0072053333123524976
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,128,4096,0.023064533869425453
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,128,16384,0.04517333507537842
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,128,3584,0.007673599819342296
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,128,3584,0.02207146684328715
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,128,12288,0.034433066844940186
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,128,3072,0.007010133564472198
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,128,10240,0.029054933786392213
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,128,3072,0.020718934138615926
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,128,2560,0.0062613333264986675
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,128,2560,0.01986773411432902
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,128,8192,0.023357866207758586
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,128,2048,0.00552106648683548
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,128,2048,0.01845653255780538
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,128,7168,0.021296000480651854
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,128,1536,0.004917333523432413
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,128,6144,0.018722132841746012
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,128,1536,0.018787199258804323
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,128,1024,0.004149333387613296
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,128,1024,0.017240534226099648
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,128,5120,0.016268799702326454
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,128,768,0.00388373335202535
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,128,768,0.016293332974116007
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,128,4096,0.013754666845003764
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,128,512,0.00346666673819224
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,128,512,0.015614933768908181
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,128,3072,0.011595732967058818
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,128,256,0.00315733328461647
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,128,3584,0.012796800335248312
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,128,256,0.015289599696795145
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,128,2048,0.008881066242853801
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,128,128,0.0029397333661715193
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,128,128,0.014974932869275412
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,128,2560,0.010605866710344952
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,128,64,0.0027583998938401537
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,128,64,0.01502293348312378
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,128,1024,0.0071487997968991595
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,128,32,0.002794666588306427
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,128,768,0.006397866706053417
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,128,32,0.01483519971370697
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,64,65536,0.043119998772939046
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,64,16384,0.009339732925097148
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,64,12288,0.007929599781831106
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,128,1536,0.008252800007661184
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,64,16384,0.05589439868927002
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,64,65536,0.2155914624532064
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,64,10240,0.009113599856694538
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,64,12288,0.039496533075968426
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,64,10240,0.034050134817759196
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,64,8192,0.008055466910203297
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,64,8192,0.029315199454625445
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,64,7168,0.007639466722806294
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,64,6144,0.007154133419195812
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,64,7168,0.028376533587773638
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,64,5120,0.0066431999206542965
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,64,6144,0.02661120096842448
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,64,5120,0.02532586654027303
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,64,4096,0.0063381334145863845
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,64,3584,0.006618666648864746
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,64,4096,0.023162666956583658
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,64,3584,0.021875200668970744
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,64,3072,0.006345599889755249
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,64,3072,0.020593067010243736
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,64,2560,0.006154666841030121
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,64,2048,0.00554666668176651
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,64,2560,0.020350933074951172
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,64,2048,0.018357332547505698
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,64,1536,0.004792533318201701
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,64,1536,0.017754666010538735
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,64,1024,0.004106666644414266
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,64,1024,0.017129600048065186
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,64,768,0.0038431999584039056
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,64,768,0.016614400347073875
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,64,512,0.0034730667869249977
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,64,512,0.015825066963831583
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,64,256,0.0031498665610949195
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,64,256,0.014968533317248026
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,64,128,0.0029397333661715193
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,64,128,0.014798933267593383
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,64,64,0.0027776000400384264
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,64,64,0.014696533481280008
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,64,32,0.0028394666810830434
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,64,32,0.01455893317858378
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,32,65536,0.040749867757161454
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,32,16384,0.008654933174451191
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,32,16384,0.05421760082244873
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,32,65536,0.21325546900431314
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,32,12288,0.008072533210118612
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,32,10240,0.007474133372306823
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,32,12288,0.037964800993601486
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,128,512,0.005716266731421152
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,32,8192,0.0068234667181968685
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,32,10240,0.03339626789093018
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,32,7168,0.006297599772612255
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,32,8192,0.02918826738993327
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,32,7168,0.028318933645884198
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,32,6144,0.006099199752012888
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,32,6144,0.026419200499852497
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,32,4096,0.022728532552719116
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,32,5120,0.0064533332983652755
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,32,4096,0.0060362666845321655
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,32,5120,0.02521599928538005
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,32,3584,0.006427733103434245
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,32,3072,0.006180266539255777
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,32,3584,0.021489065885543824
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,32,2560,0.006050133208433787
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,32,3072,0.02062826752662659
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,32,2560,0.02050666610399882
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,32,2048,0.005350400010744731
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,32,2048,0.01861226757367452
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,32,1536,0.004748799900213877
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,32,1024,0.01652479966481527
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,32,768,0.0036703998843828833
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,32,1024,0.004049066702524821
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,32,1536,0.018367999792099
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,32,512,0.003339733431736628
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,128,256,0.005272533496220907
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,32,768,0.016354133685429893
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,32,512,0.015731199582417806
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,32,256,0.003054933249950409
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,32,256,0.014912000298500061
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,32,128,0.002841600030660629
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,32,64,0.0027317332724730173
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,32,128,0.014642133315404256
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,32,64,0.014502400159835815
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,768,128,128,0.004938666522502899
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,768,32,32,0.00264533335963885
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,768,32,32,0.014407466848691305
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,65536,12288,0.580884297688802
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,65536,16384,0.794537607828776
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,65536,10240,0.8874144236246744
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,65536,12288,1.0544661204020183
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,65536,16384,1.4642475128173829
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,65536,10240,0.5226815859476726
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,65536,8192,0.3975488026936849
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,65536,8192,0.7287754694620768
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,65536,7168,0.3584202766418457
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,65536,7168,0.6113983790079753
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,65536,6144,0.3124810536702474
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,65536,6144,0.5427306493123372
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,65536,5120,0.4649834632873535
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,65536,5120,0.2542293389638265
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,65536,4096,0.35830294291178383
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,65536,12288,0.8350165049235025
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,65536,16384,1.0815935770670573
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,65536,4096,0.20971199671427407
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,65536,3584,0.31656001408894857
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,65536,10240,0.7086015701293945
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,65536,3584,0.1878698666890462
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,65536,3072,0.16499093373616536
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,65536,8192,0.5375818888346354
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,65536,3072,0.28496853510538733
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,65536,2560,0.16466879844665527
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,65536,2560,0.23980053265889487
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,65536,7168,0.46953706741333007
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,65536,2048,0.19308373133341472
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,65536,2048,0.1325824022293091
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,65536,2048,0.15118506749471028
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,65536,1536,0.14550612767537435
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,65536,1536,0.10305493672688801
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,65536,1024,0.10313920180002849
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,65536,6144,0.4216906547546387
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,65536,1024,0.07822399934132894
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,65536,768,0.0826848030090332
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,65536,768,0.06755946477254232
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,65536,5120,0.34837118784586585
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,65536,512,0.0667423963546753
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,65536,512,0.05790079832077026
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,65536,512,0.06583146651585897
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,65536,256,0.04394133488337199
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,65536,4096,0.27918612162272133
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,65536,256,0.05013546546300253
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,65536,128,0.0366485317548116
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,65536,3584,0.24650880495707192
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,65536,128,0.04499413172403972
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,65536,64,0.031922133763631184
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,65536,64,0.04611093203226725
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,65536,32,0.03161279956499736
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,65536,32,0.04756906827290853
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,65536,3072,0.20916694005330405
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,65536,2560,0.18074347178141276
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,65536,1536,0.11567893028259277
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,65536,1024,0.0886122703552246
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,16384,65536,0.8494058609008789
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,16384,16384,0.34677972793579104
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,16384,16384,0.2120426654815674
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,65536,768,0.08111573060353597
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,16384,65536,1.570248540242513
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,16384,65536,1.0787391662597656
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,16384,12288,0.16387839317321778
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,16384,12288,0.26188799540201824
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,16384,10240,0.25089386304219563
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,16384,10240,0.17994027137756347
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,16384,8192,0.1820543924967448
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,16384,8192,0.13274453481038412
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,16384,7168,0.1554207960764567
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,65536,256,0.05603306690851847
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,16384,7168,0.1108671983083089
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,65536,128,0.051787734031677246
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,16384,6144,0.1337824026743571
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,16384,6144,0.09742720127105713
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,16384,5120,0.11257706483205158
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,16384,5120,0.0853109359741211
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,16384,4096,0.09193387031555175
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,16384,4096,0.06862506866455079
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,16384,16384,0.26687040328979494
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,16384,3584,0.08282346725463867
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,16384,12288,0.20564799308776854
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,16384,3584,0.060983467102050784
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,16384,10240,0.1819935957590739
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,16384,3072,0.0700821320215861
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,16384,3072,0.054757332801818846
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,16384,8192,0.13128639856974283
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,16384,2560,0.05866346756617228
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,16384,2560,0.04933013518651326
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,16384,7168,0.1156160036722819
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,16384,2048,0.04992106755574544
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,16384,6144,0.10538986523946126
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,16384,2048,0.04409279823303223
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,16384,1536,0.038378667831420896
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,16384,1536,0.03751360177993775
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,16384,1024,0.028075732787450153
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,16384,5120,0.08825066884358725
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,16384,1024,0.031464533011118574
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,16384,768,0.023081600666046143
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,16384,768,0.028500266869862872
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,16384,4096,0.06935253143310546
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,16384,512,0.017167999347050985
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,16384,3584,0.06279253164927165
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,16384,512,0.025222400824228924
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,16384,3072,0.053889067967732754
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,16384,256,0.01211199959119161
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,16384,2560,0.047866666316986085
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,16384,256,0.022261333465576173
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,16384,128,0.009406933188438415
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,16384,2048,0.04002026716868083
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,16384,128,0.020694400866826376
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,16384,64,0.007889066636562348
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,16384,1536,0.033890132109324136
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,16384,64,0.020546134312947592
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,16384,32,0.008242133259773254
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,16384,32,0.02065599958101908
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,16384,1024,0.02685439984003703
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,16384,768,0.024077866474787393
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,12288,16384,0.26862506866455077
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,12288,65536,0.6860373179117839
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,12288,16384,0.17394026120503742
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,12288,65536,1.0859210968017579
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,12288,12288,0.13496425946553547
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,12288,12288,0.20457280476888023
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,12288,10240,0.16992106437683105
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,12288,10240,0.11583147048950196
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,12288,8192,0.13720533053080242
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,12288,8192,0.09665706952412924
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,16384,512,0.02020053267478943
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,12288,7168,0.12109546661376953
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,16384,256,0.017704532543818156
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,12288,7168,0.08548906644185385
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,16384,128,0.015794133146603904
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,12288,6144,0.10481386979420979
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,12288,6144,0.0766752004623413
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,12288,5120,0.08936320145924886
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,12288,5120,0.06751039822896322
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,12288,10240,0.13654932975769044
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,12288,4096,0.07032106717427572
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,12288,4096,0.05660586754480997
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,12288,12288,0.15836052894592284
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,12288,3584,0.062320001920064295
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,12288,3584,0.05110186735788981
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,12288,65536,0.8514538447062174
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,12288,16384,0.20208427111307778
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,12288,3072,0.05751573244730631
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,12288,3072,0.046462933222452804
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,12288,2560,0.046486401557922365
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,12288,5120,0.06553706725438437
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,12288,2560,0.04121599992116292
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,12288,2048,0.038101331392923994
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,12288,7168,0.08886400063832602
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,12288,2048,0.036849065621693926
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,12288,1536,0.02972266674041748
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,12288,1536,0.03132479985555013
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,12288,8192,0.1013983964920044
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,12288,1024,0.02132586638132731
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,12288,1024,0.0270687997341156
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,12288,6144,0.078875732421875
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,12288,768,0.017293866475423178
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,12288,768,0.025085866451263428
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,12288,2560,0.03652906815210978
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,12288,512,0.013487999637921652
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,12288,512,0.02258560061454773
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,12288,4096,0.05252373218536377
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,12288,256,0.00969599982102712
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,12288,256,0.020169599850972494
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,12288,2048,0.03167999982833862
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,12288,128,0.007336533566315968
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,12288,128,0.018639999628067016
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,12288,3584,0.048207998275756836
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,12288,64,0.006567466755708058
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,12288,64,0.019064533710479736
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,12288,32,0.006692266464233399
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,12288,32,0.019449599583943687
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,12288,1536,0.02677333354949951
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,12288,3072,0.04141226609547933
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,12288,768,0.01927466591199239
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,10240,65536,0.5794442494710286
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,10240,65536,0.8727914810180664
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,10240,16384,0.22128213246663414
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,10240,16384,0.1612885316212972
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,10240,12288,0.1675125281016032
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,10240,12288,0.12177493572235107
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,10240,10240,0.14047147432963053
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,10240,10240,0.10270613034566242
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,12288,1024,0.021011199553807577
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,10240,8192,0.11226133505503337
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,10240,8192,0.08415573438008626
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,12288,128,0.012743467092514038
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,12288,256,0.01404159963130951
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,10240,7168,0.09843520323435465
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,10240,7168,0.07589653333028158
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,10240,6144,0.08543039957682291
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,12288,512,0.01641386648019155
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,10240,6144,0.0678879976272583
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,10240,5120,0.0723359982172648
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,10240,5120,0.059180800120035806
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,10240,12288,0.13704106012980144
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,10240,4096,0.05895466804504394
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,10240,4096,0.05108160177866618
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,10240,16384,0.17797867457071942
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,10240,65536,0.7113994598388672
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,10240,3584,0.05559573173522949
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,10240,3584,0.04935893217722575
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,10240,10240,0.11823999881744385
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,10240,3072,0.046204801400502524
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,10240,3072,0.04312853415807088
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,10240,6144,0.0707530657450358
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,10240,8192,0.09159253438313802
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,10240,2560,0.03973759810129802
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,10240,2560,0.038209064801534014
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,10240,2048,0.03269333243370056
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,10240,2048,0.03347413142522176
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,10240,7168,0.08106559912363688
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,10240,1536,0.026495999097824095
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,10240,1536,0.029275733232498168
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,10240,1024,0.018498132626215615
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,10240,1024,0.025389866034189863
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,10240,5120,0.058746667702992764
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,10240,768,0.01527466674645742
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,10240,768,0.023944532871246337
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,10240,4096,0.04926400184631348
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,10240,512,0.012213333447774252
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,10240,512,0.022270933787027995
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,10240,3072,0.037876268227895096
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,10240,256,0.008835200468699138
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,10240,256,0.019163733720779418
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,10240,3584,0.043859199682871504
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,10240,1536,0.023460266987482707
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,10240,128,0.007122133175532024
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,10240,128,0.0184714674949646
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,10240,64,0.006201600035031637
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,10240,64,0.018458666404088338
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,10240,32,0.0065301333864529925
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,10240,32,0.018802134195963542
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,10240,2560,0.03484799861907959
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,8192,65536,0.4867712020874023
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,8192,65536,0.7098826726277669
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,8192,16384,0.1964970588684082
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,10240,1024,0.01995519995689392
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,10240,2048,0.028525867064793903
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,8192,16384,0.14687786102294922
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,8192,12288,0.13176106611887614
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,8192,12288,0.11431252956390381
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,10240,768,0.0165173331896464
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,8192,10240,0.11002346674601238
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,8192,10240,0.08452479839324951
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,8192,8192,0.088427734375
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,8192,8192,0.07077120145161947
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,10240,256,0.011636267105738323
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,8192,7168,0.07897066275278727
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,10240,512,0.014342400431632995
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,8192,7168,0.0642911990483602
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,8192,6144,0.06861866315205892
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,8192,6144,0.057523198922475184
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,10240,128,0.010469333330790202
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,8192,5120,0.05722986857096354
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,8192,16384,0.13922773996988932
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,8192,5120,0.050543999671936034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,8192,65536,0.569816525777181
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,8192,4096,0.04634773333867391
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,8192,12288,0.1094325304031372
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,8192,4096,0.04736959934234619
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,8192,3584,0.04123520056406657
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,8192,3584,0.042049066225687666
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,8192,10240,0.09325013160705567
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,8192,3072,0.03679573138554891
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,8192,3072,0.03635306755701701
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,8192,8192,0.06561919848124186
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,8192,2560,0.030858665704727173
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,8192,2560,0.03272639910380046
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,8192,2048,0.025624533494313557
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,8192,2048,0.029389866193135578
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,8192,7168,0.05949973265329996
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,8192,1536,0.02029013236363729
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,8192,1536,0.026245333751042682
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,8192,6144,0.053711998462677005
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,8192,1024,0.015119999647140503
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,8192,1024,0.023076266050338745
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,8192,5120,0.04527999957402547
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,8192,768,0.012754133343696595
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,8192,768,0.022035199403762817
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,8192,4096,0.038889598846435544
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,8192,512,0.010335999727249145
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,8192,3584,0.03351999918619792
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,8192,512,0.019780266284942626
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,8192,512,0.012545067071914672
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,8192,256,0.007607466479142506
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,8192,256,0.01840533415476481
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,8192,3072,0.029789867003758748
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,8192,128,0.006151466568311056
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,8192,128,0.017576533555984496
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,8192,64,0.005362133185068766
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,8192,2560,0.025906133651733398
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,8192,64,0.017861332496007284
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,8192,32,0.005634133517742157
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,8192,32,0.017903999487559
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,8192,2048,0.022870399554570518
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,7168,65536,0.4630133310953776
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,7168,65536,0.6474688212076823
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,7168,16384,0.16598079999287924
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,7168,16384,0.1287274678548177
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,8192,1536,0.01949653426806132
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,7168,12288,0.12309226989746094
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,7168,12288,0.09436480204264322
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,8192,1024,0.01611306667327881
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,7168,10240,0.10392213662465413
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,8192,768,0.01460906664530436
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,7168,10240,0.08185173670450846
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,7168,8192,0.08399999936421712
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,8192,256,0.010496000448862713
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,7168,8192,0.06867199738820394
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,8192,128,0.009954133629798889
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,7168,7168,0.07408959865570068
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,7168,7168,0.06210773388544718
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,7168,6144,0.061520000298817955
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,7168,6144,0.05640639861424764
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,7168,65536,0.48515841166178386
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,7168,5120,0.056111999352773036
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,7168,5120,0.0560149351755778
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,7168,16384,0.12448853651682537
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,7168,4096,0.04318399826685588
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,7168,4096,0.03998613357543945
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,7168,3584,0.03652266661326091
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,7168,12288,0.09498026371002197
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,7168,3584,0.03676799933115642
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,7168,3072,0.031464533011118574
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,7168,3072,0.03300586740175883
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,7168,10240,0.07947946389516194
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,7168,2560,0.026971733570098876
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,7168,2560,0.02995520035425822
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,7168,8192,0.0660533348719279
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,7168,2048,0.022326399882634483
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,7168,7168,0.05788799921671549
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,7168,2048,0.027036799987157183
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,7168,2048,0.021425066391626994
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,7168,1536,0.017698132991790773
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,7168,1536,0.024552534023920693
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,7168,1024,0.013018666704495748
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,7168,1024,0.021859200795491536
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,7168,6144,0.04962986707687378
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,7168,768,0.010598400235176086
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,7168,768,0.020383999745051066
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,7168,5120,0.0437173326810201
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,7168,4096,0.03587840000788371
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,7168,512,0.008532266815503438
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,7168,512,0.01850773294766744
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,7168,256,0.006180266539255777
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,7168,256,0.01836586594581604
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,7168,3584,0.03230080008506775
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,7168,256,0.008729599912961324
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,7168,128,0.0053610667586326596
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,7168,128,0.017198934157689413
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,7168,64,0.004931200047334035
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,7168,3072,0.027584000428517656
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,7168,64,0.017398399114608765
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,7168,32,0.005178666611512502
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,7168,32,0.017717333634694417
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,7168,2560,0.025075199206670125
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,7168,1536,0.017516799767812095
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,6144,65536,0.5290677388509114
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,6144,65536,0.401144536336263
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,6144,16384,0.13469120661417644
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,6144,16384,0.10609493255615235
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,7168,1024,0.015042133132616677
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,6144,12288,0.10204799969991048
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,6144,12288,0.08386666774749756
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,6144,10240,0.0857589324315389
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,7168,768,0.012319999933242797
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,6144,10240,0.07630080382029215
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,6144,8192,0.0688927968343099
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,6144,8192,0.062057598431905114
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,6144,7168,0.06079039971033732
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,6144,7168,0.05572160085042318
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,6144,7168,0.050411732991536465
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,7168,512,0.010405332843462626
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,6144,6144,0.05230186780293783
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,6144,6144,0.05008213520050049
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,6144,5120,0.045354668299357095
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,7168,128,0.007855999966462452
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,6144,5120,0.04402773380279541
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,6144,4096,0.036880000432332354
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,6144,4096,0.03810986677805583
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,6144,65536,0.5028501192728678
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,6144,16384,0.11205333073933918
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,6144,3584,0.033557331562042235
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,6144,3584,0.034825599193573
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,6144,3584,0.029415466388066608
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,6144,3072,0.027956267197926838
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,6144,12288,0.0847978671391805
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,6144,3072,0.03271786570549011
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,6144,2560,0.02407039999961853
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,6144,10240,0.08231893380482992
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,6144,2560,0.029107199112574263
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,6144,2560,0.022603732347488404
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,6144,2048,0.020027732849121092
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,6144,2048,0.026745599508285523
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,6144,1536,0.016040533781051636
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,6144,8192,0.05766079823176066
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,6144,1536,0.024344533681869507
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,6144,1024,0.012122666835784912
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,6144,1024,0.021317332983016968
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,6144,6144,0.04478293259938558
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,6144,768,0.010455466310183207
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,6144,768,0.020358399550120036
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,6144,5120,0.03799039920171102
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,6144,512,0.008516266942024231
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,6144,512,0.018694400787353516
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,6144,4096,0.0319872001806895
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,6144,512,0.00979200005531311
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,6144,256,0.006203733384609222
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,6144,256,0.01734506686528524
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,6144,128,0.005173333485921224
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,6144,128,0.016793600718180337
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,6144,3072,0.024784000714619954
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,6144,64,0.004709333181381226
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,6144,64,0.016755199432373045
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,6144,2048,0.019268266359965005
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,6144,32,0.004950400193532308
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,6144,1536,0.016456533471743265
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,6144,32,0.016888533035914102
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,6144,1024,0.014221866925557455
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,5120,16384,0.11460373401641846
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,5120,65536,0.448246415456136
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,5120,65536,0.37045332590738933
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,5120,16384,0.09600853125254313
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,5120,12288,0.09213333129882813
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,5120,12288,0.07563947041829427
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,5120,10240,0.07331199645996093
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,5120,10240,0.06584213177363077
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,5120,8192,0.05922453403472901
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,6144,768,0.011534933249155681
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,5120,8192,0.05569279988606771
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,5120,7168,0.052294401327768955
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,5120,7168,0.051062401135762533
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,6144,256,0.008563199639320373
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,5120,6144,0.046133331457773846
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,5120,6144,0.046401067574818926
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,6144,128,0.007912533481915791
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,5120,5120,0.038578132788340255
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,5120,5120,0.04082346757253011
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,5120,12288,0.08120960394541422
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,5120,4096,0.03125759959220886
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,5120,4096,0.03493760029474895
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,5120,16384,0.11043306986490886
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,5120,3584,0.028248532613118486
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,5120,3584,0.032314666112263996
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,5120,65536,0.4634154637654622
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,5120,6144,0.04027093251546224
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,5120,10240,0.06730666955312094
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,5120,3072,0.025521065791447955
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,5120,3072,0.030024532477060956
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,5120,2560,0.021605332692464195
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,5120,2560,0.02763200004895528
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,5120,2048,0.01798506577809652
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,5120,2048,0.02609279950459798
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,5120,7168,0.045748265584309895
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,5120,1536,0.014867200454076131
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,5120,1536,0.023805866638819374
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,5120,8192,0.05187626679738363
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,5120,1024,0.011597866813341778
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,5120,1024,0.021280000607172646
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,5120,5120,0.034756267070770265
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,5120,768,0.01013866662979126
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,5120,768,0.019898666938145956
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,5120,3072,0.02248959938685099
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,5120,512,0.008026666442553202
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,5120,512,0.018769067525863648
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,5120,4096,0.02905813256899516
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,5120,256,0.006313600142796834
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,5120,256,0.017927465836207072
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,5120,1536,0.015012266238530478
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,5120,128,0.0050570666790008545
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,5120,128,0.017118932803471883
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,5120,2560,0.02079253395398458
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,5120,64,0.004570666452248891
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,5120,64,0.016731733083724977
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,5120,32,0.0047989333669344585
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,5120,3584,0.026369067033131917
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,5120,32,0.01671573321024577
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,5120,2048,0.01780160069465637
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,5120,1024,0.012739200393358865
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,4096,16384,0.08863999843597412
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,4096,65536,0.3553215980529785
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,4096,16384,0.08282132943471274
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,4096,65536,0.3102890650431315
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,4096,12288,0.06747413476308187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,4096,12288,0.06661866505940756
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,4096,10240,0.0573365330696106
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,5120,768,0.010577066739400228
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,4096,10240,0.057811200618743896
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,4096,8192,0.04646079937616984
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,4096,8192,0.049290664990743
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,5120,256,0.007810133198897044
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,4096,7168,0.0404693325360616
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,5120,128,0.007285333176453908
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,4096,7168,0.04485973517100016
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,5120,512,0.009103999535242716
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,4096,6144,0.03517546653747559
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,4096,6144,0.04043413400650024
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,4096,5120,0.030013867219289142
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,4096,5120,0.0361194650332133
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,4096,10240,0.05209920008977255
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,4096,4096,0.02480213244756063
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,4096,4096,0.030560000737508135
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,4096,65536,0.30540374120076497
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,4096,3584,0.022309333086013794
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,4096,12288,0.05999146699905396
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,4096,3584,0.028642133871714277
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,4096,3072,0.01941439906756083
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,4096,3072,0.026739199956258137
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,4096,16384,0.07379413445790609
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,4096,2560,0.01653333306312561
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,4096,2560,0.024864000082015992
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,4096,8192,0.03778986533482869
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,4096,2048,0.01418346663316091
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,4096,2048,0.023918932676315306
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,4096,7168,0.034355199337005614
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,4096,1536,0.011708799997965496
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,4096,1536,0.021684267123540244
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,4096,6144,0.03099306623140971
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,4096,1024,0.009144533673922222
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,4096,1024,0.01904426614443461
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,4096,5120,0.0256223996480306
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,4096,768,0.007811200122038524
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,4096,768,0.018345600366592406
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,4096,2560,0.01607039968172709
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,4096,512,0.006157866617043813
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,4096,512,0.0179584006468455
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,4096,4096,0.022537599007288613
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,4096,256,0.005017599960168203
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,4096,256,0.016516266266504924
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,4096,3584,0.020129066705703736
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,4096,128,0.004402133325735727
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,4096,128,0.016275200247764587
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,4096,3072,0.01789120038350423
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,4096,2048,0.014505599935849508
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,4096,64,0.004062933226426443
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,4096,64,0.016025599837303162
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,4096,32,0.004267733295758565
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,4096,32,0.016177067160606386
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,3584,65536,0.3161237398783366
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,3584,65536,0.2878922780354818
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,3584,16384,0.07985813617706299
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,3584,16384,0.0775264024734497
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,4096,1536,0.01275200049082438
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,3584,12288,0.05991573333740234
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,3584,12288,0.061945601304372155
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,4096,768,0.009314133723576864
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,4096,1024,0.010123733679453533
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,3584,10240,0.0499232014020284
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,4096,256,0.007524266839027405
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,3584,10240,0.055613867441813146
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,3584,8192,0.04101333220799764
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,3584,8192,0.046225066979726157
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,3584,7168,0.03579946756362915
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,3584,7168,0.04177920023600261
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,4096,512,0.008266666531562805
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,3584,6144,0.031446399291356404
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,3584,6144,0.03683520158131917
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,4096,128,0.0071829333901405334
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,3584,5120,0.026771199703216553
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,3584,5120,0.032638933261235556
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,3584,12288,0.05332906643549601
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,3584,4096,0.021917865673700968
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,3584,4096,0.029048534234364827
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,3584,65536,0.2762506802876791
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,3584,3584,0.02037973403930664
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,3584,16384,0.07106239795684814
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,3584,3584,0.02736746668815613
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,3584,3072,0.017459199825922648
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,3584,3072,0.025388799111048382
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,3584,6144,0.029076266288757324
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,3584,2560,0.014854400356610616
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,3584,2560,0.024037333329518636
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,3584,10240,0.04540266593297322
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,3584,2048,0.012800000111262002
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,3584,2048,0.022382932901382446
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,3584,7168,0.033214932680130003
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,3584,1536,0.01066986620426178
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,3584,1536,0.02008533279101054
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,3584,8192,0.037928533554077146
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,3584,1024,0.008307200173536937
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,3584,1024,0.018888533115386963
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,3584,5120,0.025707733631134034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,3584,768,0.006899199883143107
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,3584,768,0.017977599302927652
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,3584,3072,0.017414400974909462
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,3584,512,0.005795200169086456
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,3584,512,0.017288533846537273
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,3584,4096,0.021261866887410483
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,3584,256,0.004716800153255462
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,3584,256,0.016214399536450704
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,3584,3584,0.01994880040486654
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,3584,128,0.004247466723124186
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,3584,128,0.016013866662979125
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,3584,1536,0.011941333611806233
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,3584,64,0.0038304001092910765
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,3584,2560,0.01556373337904612
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,3584,64,0.01586133340994517
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,3584,32,0.004021333406368891
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,3584,32,0.015884799758593242
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,3072,65536,0.27364158630371094
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,3072,65536,0.27617174784342446
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,3584,2048,0.01378986636797587
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,3072,16384,0.06855680147806803
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,3072,16384,0.07258240381876627
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,3584,768,0.008141866823037466
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,3072,12288,0.05207146803538004
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,3584,1024,0.00967680017153422
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,3072,12288,0.058754134178161624
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,3072,10240,0.0437610665957133
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,3072,10240,0.05160426696141561
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,3584,256,0.006514133512973785
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,3072,8192,0.03577599922815959
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,3072,8192,0.04297066529591878
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,3584,512,0.007342933118343354
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,3072,7168,0.03218773404757182
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,3072,7168,0.038819201787312824
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,3072,6144,0.02808319926261902
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,3072,6144,0.03472426732381185
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,3584,128,0.0061034664511680605
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,3072,5120,0.024977066119511924
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,3072,5120,0.030723200241724653
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,3072,12288,0.04743680159250895
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,3072,4096,0.019211733341217042
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,3072,65536,0.2622901280721029
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,3072,4096,0.027672533194224042
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,3072,3584,0.0171615997950236
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,3072,3584,0.02622293432553609
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,3072,16384,0.062422398726145426
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,3072,3072,0.015254400173823037
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,3072,3072,0.02518293261528015
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,3072,6144,0.026444800694783527
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,3072,2560,0.013406933347384135
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,3072,2560,0.022702932357788086
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,3072,10240,0.04066559871037801
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,3072,2048,0.011426132917404175
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,3072,2048,0.022366933027903237
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,3072,7168,0.029819732904434203
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,3072,1536,0.009551999966303508
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,3072,1536,0.019447465737660728
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,3072,8192,0.03376213312149048
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,3072,1024,0.007331199944019318
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,3072,1024,0.018367999792099
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,3072,5120,0.02319999933242798
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,3072,768,0.006353066861629486
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,3072,768,0.017621332406997682
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,3072,3072,0.01593386630217234
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,3072,512,0.005388799806435903
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,3072,512,0.016883200407028197
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,3072,4096,0.019749333461125694
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,3072,256,0.004580266773700714
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,3072,256,0.015964800119400026
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,3072,1536,0.010803199807802836
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,3072,128,0.003977599988381068
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,3072,128,0.015803733468055726
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,3072,3584,0.01819733381271362
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,3072,2560,0.014518400033315023
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,3072,64,0.0037759999434153237
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,3072,64,0.01560533344745636
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,3072,32,0.0038880000511805216
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,3072,32,0.01576746702194214
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,2560,65536,0.23283626238505045
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,2560,65536,0.2582389354705811
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,2560,16384,0.060679467519124355
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,2560,16384,0.06730666955312094
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,3072,1024,0.00904960036277771
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,3072,2048,0.0126720001300176
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,2560,12288,0.05078933238983154
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,2560,12288,0.059837865829467776
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,2560,10240,0.048053332169850665
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,2560,10240,0.05190080006917318
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,3072,768,0.007816533247629803
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,2560,8192,0.031729066371917726
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,2560,8192,0.03926506837209066
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,3072,256,0.006552533308664958
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,3072,128,0.006028800209363302
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,2560,7168,0.033080534140268965
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,2560,7168,0.036329599221547444
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,3072,512,0.007178666690985362
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,2560,6144,0.029127466678619384
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,2560,6144,0.03503359953562419
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,2560,5120,0.026258132855097455
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,2560,5120,0.0315829336643219
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,2560,12288,0.04752000172932942
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,2560,4096,0.017729065815607705
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,2560,4096,0.0282368004322052
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,2560,65536,0.23422187169392905
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,2560,3584,0.01655359963575999
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,2560,16384,0.058507732550303136
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,2560,3584,0.025172267357508344
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,2560,3072,0.015615999698638916
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,2560,3072,0.023652267456054688
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,2560,10240,0.037698133786519365
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,2560,2560,0.013091199596722922
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,2560,2560,0.02233920097351074
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,2560,6144,0.023906133572260537
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,2560,2048,0.011069867014884948
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,2560,8192,0.03142506678899129
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,2560,2048,0.02072319984436035
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,2560,7168,0.02776106595993042
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,2560,1536,0.009157333771387737
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,2560,1536,0.019397334257761637
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,2560,5120,0.021399466196695964
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,2560,1024,0.006648533542950948
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,2560,1024,0.01791680057843526
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,2560,768,0.00590826670328776
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,2560,768,0.017565866311391197
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,2560,3072,0.014689067006111145
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,2560,512,0.00513919989267985
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,2560,512,0.016888533035914102
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,2560,4096,0.017927465836207072
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,2560,256,0.004287999868392944
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,2560,256,0.01609813372294108
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,2560,3584,0.017003732919692992
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,2560,128,0.0039327998956044516
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,2560,128,0.015369600057601929
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,2560,2560,0.013521066308021546
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,2560,64,0.0035584000249703727
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,2560,64,0.01552959978580475
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,2560,32,0.0036746665835380556
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,2560,32,0.015599999825159708
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,2048,65536,0.1814858595530192
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,2048,65536,0.22734293937683106
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,2560,1536,0.009916800260543823
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,2048,16384,0.05203093290328979
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,2048,16384,0.06785493691762289
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,2560,2048,0.01188053290049235
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,2048,12288,0.05132160186767578
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,2048,12288,0.056898132960001624
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,2560,768,0.007354666789372762
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,2560,256,0.006150400141874949
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,2560,1024,0.008370133241017659
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,2048,10240,0.043057068188985186
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,2048,10240,0.04969813426335652
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,2560,512,0.00684799998998642
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,2048,8192,0.031972267230351764
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,2048,8192,0.037989334265391035
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,2048,7168,0.028500266869862872
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,2048,7168,0.035548798243204754
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,2048,6144,0.027628799279530842
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,2048,6144,0.03287893335024516
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,2560,128,0.005707733333110809
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,2048,5120,0.02253119945526123
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,2048,5120,0.029292800029118854
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,2048,65536,0.23674987157185873
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,2048,4096,0.017590399583180746
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,2048,4096,0.026073600848515826
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,2048,16384,0.05721280177434286
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,2048,3584,0.016782933473587038
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,2048,3584,0.024570665756861367
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,2048,12288,0.04505279858907064
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,2048,3072,0.015067733327547708
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,2048,3072,0.0230240007241567
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,2048,10240,0.03971306482950847
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,2048,2560,0.01239466667175293
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,2048,8192,0.030933332443237305
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,2048,2560,0.02118399937947591
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,2048,2048,0.010790399710337321
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,2048,2048,0.019814399878184
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,2048,7168,0.026715733607610065
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,2048,6144,0.024221867322921753
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,2048,1536,0.008939733107884724
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,2048,1536,0.01874986688296
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,2048,1024,0.007030400137106578
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,2048,1024,0.017679999272028603
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,2048,5120,0.020779732863108316
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,2048,768,0.005619200070699056
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,2048,768,0.017157334089279174
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,2048,4096,0.01776426633199056
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,2048,512,0.004828799764315287
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,2048,512,0.016056533654530844
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,2048,3072,0.014574933052062988
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,2048,256,0.004107733319203059
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,2048,3584,0.016247466206550598
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,2048,256,0.015270400047302245
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,2048,128,0.003671466559171677
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,2048,128,0.015340800086657206
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,2048,768,0.007192533214886982
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,2048,64,0.003420799970626831
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,2048,2048,0.0108106662829717
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,2048,1024,0.008098133405049642
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,2048,64,0.015221333503723145
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,2048,32,0.0035135999321937563
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,2048,32,0.015140266219774882
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,1536,65536,0.14432640075683595
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,1536,65536,0.20630720456441246
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,1536,16384,0.04153279860814412
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,1536,16384,0.06470613479614258
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,1536,12288,0.042190933227539064
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,1536,12288,0.05246719916661581
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,2048,2560,0.013064533472061157
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,1536,10240,0.036892799536387126
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,1536,10240,0.04586133162180583
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,2048,1536,0.00942186713218689
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,1536,8192,0.027951999505360918
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,1536,8192,0.03667519887288411
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,2048,512,0.006904533505439759
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,1536,7168,0.02606719930966695
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,1536,7168,0.03338559865951538
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,2048,256,0.006318933268388112
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,2048,128,0.005915733178456625
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,1536,6144,0.021977599461873373
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,1536,6144,0.03076159954071045
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,1536,12288,0.037811199824015304
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,1536,16384,0.04890880187352498
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,1536,5120,0.01959999998410543
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,1536,65536,0.21627626419067383
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,1536,5120,0.02813120086987813
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,1536,4096,0.015384533007939658
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,1536,4096,0.02523733377456665
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,1536,3584,0.01358080009619395
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,1536,3584,0.02333546678225199
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,1536,3072,0.012993066509564718
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,1536,3072,0.02215253313382467
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,1536,10240,0.03385920127232869
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,1536,2560,0.010912000139554342
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,1536,2560,0.02221440076828003
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,1536,8192,0.025718400875727337
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,1536,2048,0.009342933694521587
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,1536,2048,0.019399466117223103
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,1536,6144,0.020600533485412596
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,1536,1536,0.007727999985218048
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,1536,1536,0.01821546753247579
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,1536,7168,0.023484800259272257
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,1536,1024,0.0058218667904535925
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,1536,1024,0.017484800020853678
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,1536,4096,0.01541866660118103
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,1536,768,0.005089066425959269
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,1536,3072,0.012425600488980611
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,1536,768,0.016603733102480568
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,1536,512,0.004273066421349844
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,1536,512,0.01575040022532145
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,1536,3584,0.014528000354766845
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,1536,5120,0.018074667453765868
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,1536,256,0.0036746665835380556
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,1536,2560,0.011010133226712545
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,1536,256,0.015525333086649575
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,1536,128,0.0033813332517941795
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,1536,128,0.01546346644560496
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,1536,64,0.0031370667119820913
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,1536,64,0.01514240006605784
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,1536,32,0.0033215999603271483
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,1536,32,0.01511253317197164
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,1024,65536,0.0967957337697347
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,1024,65536,0.18427093823750812
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,1536,2048,0.009551999966303508
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,1536,1536,0.008666666348775227
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,1024,16384,0.031070933739344282
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,1024,16384,0.05886293252309164
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,1024,12288,0.034626134236653644
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,1024,12288,0.045431466897328694
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,1536,1024,0.0074314668774604796
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,1024,10240,0.03215893308321635
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,1024,10240,0.040185598532358806
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,1536,128,0.005211733281612396
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,1024,8192,0.021027199427286782
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,1536,256,0.0054730668663978575
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,1024,8192,0.0332096000512441
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,1536,512,0.006121600170930227
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,1024,7168,0.01885439952214559
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,1024,7168,0.030305065711339313
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,1024,6144,0.01633280018965403
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,1536,768,0.006461866696675618
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,1024,6144,0.02877440055211385
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,1024,5120,0.014662399888038635
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,1024,5120,0.025678932666778564
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,1024,65536,0.19508372942606608
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,1024,4096,0.011924266815185547
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,1024,4096,0.024555732806523643
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,1024,16384,0.04160106579462687
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,1024,3584,0.010852266351381938
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,1024,3584,0.02299840052922567
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,1024,12288,0.03216853340466817
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,1024,3072,0.009947733084360758
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,1024,3072,0.02013333241144816
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,1024,10240,0.03280106584231059
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,1024,2560,0.008545066912968953
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,1024,2560,0.01959786613782247
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,1024,8192,0.02258239984512329
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,1024,2048,0.007260799904664357
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,1024,7168,0.02057066758473714
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,1024,2048,0.01914773384730021
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,1024,1536,0.005989333490530649
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,1024,1536,0.01748266617457072
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,1024,6144,0.018506666024525963
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,1024,5120,0.016101333498954772
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,1024,1024,0.004862933357556661
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,1024,1024,0.01647040049235026
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,1024,768,0.004403199752171834
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,1024,768,0.016275200247764587
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,1024,4096,0.013798399766286214
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,1024,512,0.004013866682847341
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,1024,512,0.015878400206565856
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,1024,3072,0.011366400122642516
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,1024,256,0.003525333354870478
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,1024,256,0.015202132860819497
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,1024,3584,0.012822399536768595
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,1024,128,0.0032757334411144257
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,1024,128,0.015027200182278952
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,1024,2560,0.010416000088055929
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,1024,2048,0.008913066983222962
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,1024,64,0.003101866692304611
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,1024,64,0.015030399958292643
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,1024,32,0.0032458665470282235
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,1024,32,0.01511360009511312
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,1024,768,0.006309333443641663
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,1024,1024,0.006916266679763794
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,768,65536,0.07773973147074381
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,768,16384,0.02632960081100464
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,768,65536,0.1753013292948405
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,768,16384,0.052614398797353114
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,1024,1536,0.008335999647776286
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,768,12288,0.024251733223597208
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,768,12288,0.04055253267288208
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,768,10240,0.021959465742111207
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,768,10240,0.035046398639678955
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,768,8192,0.0167797327041626
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,768,8192,0.030359466870625813
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,1024,512,0.006082133452097575
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,768,7168,0.015105066696802774
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,768,7168,0.028728532791137695
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,1024,256,0.0054175997773806255
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,768,6144,0.013435733318328858
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,768,6144,0.02595733404159546
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,1024,128,0.005186133086681366
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,768,5120,0.014299733440081277
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,768,5120,0.025364265839258833
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,768,12288,0.03297599951426188
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,768,4096,0.01093226671218872
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,768,4096,0.02384106715520223
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,768,16384,0.044069333871205645
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,768,65536,0.1699722607930501
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,768,3584,0.009619200229644775
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,768,3584,0.021371734142303467
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,768,3072,0.008775466680526733
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,768,3072,0.02067413330078125
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,768,10240,0.028757333755493164
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,768,2560,0.0077461332082748415
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,768,2560,0.019977599382400513
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,768,7168,0.021201066176096597
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,768,2048,0.006837333242098491
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,768,8192,0.02355626622835795
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,768,2048,0.019397334257761637
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,768,6144,0.01884160041809082
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,768,1536,0.006129066646099091
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,768,1536,0.01776640017827352
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,768,1024,0.004657066861788432
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,768,1024,0.016919465859731038
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,768,5120,0.016173866391181946
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,768,768,0.004304000238577525
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,768,768,0.016350932916005454
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,768,3072,0.010805333654085796
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,768,4096,0.013454932967821756
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,768,512,0.003668266783157984
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,768,512,0.015737600127855935
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,768,256,0.00335359995563825
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,768,3584,0.01283519963423411
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,768,256,0.015222400426864624
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,768,128,0.003099733342727025
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,768,128,0.014892799655596414
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,768,2560,0.010223999619483948
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,768,64,0.002942933390537898
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,768,64,0.014825600385665893
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,768,32,0.0030250666042168934
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,768,1536,0.008100266754627227
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,768,32,0.015074132879575094
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,512,65536,0.05878400007883707
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,512,65536,0.16751999855041505
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,512,16384,0.019642666975657145
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,768,1024,0.007129600147406261
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,512,16384,0.0485482652982076
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,512,12288,0.01990613341331482
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,768,2048,0.00902506709098816
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,512,12288,0.039341866970062256
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,512,10240,0.01770026683807373
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,512,10240,0.034953598181406656
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,768,768,0.006307200094064077
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,512,8192,0.013969066739082336
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,512,8192,0.02982719937960307
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,768,256,0.005365333457787832
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,512,7168,0.013285332918167114
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,768,128,0.005075199902057648
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,768,512,0.006018133461475372
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,512,7168,0.027989333868026732
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,512,6144,0.014144000411033631
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,512,6144,0.026103466749191284
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,512,5120,0.012582400441169738
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,512,5120,0.02432960073153178
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,512,65536,0.15789119402567547
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,512,4096,0.008807466427485148
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,512,4096,0.021835732460021972
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,512,16384,0.03858986695607503
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,512,3584,0.008903466661771138
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,512,12288,0.030061866839726763
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,512,3584,0.02071253259976705
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,512,3072,0.008011733492215473
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,512,3072,0.01958720088005066
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,512,10240,0.025935999552408856
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,512,2560,0.007256533205509186
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,512,8192,0.021155200401941934
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,512,2560,0.019492266575495402
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,512,7168,0.01887680093447367
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,512,2048,0.00647573322057724
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,512,6144,0.016355199615160625
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,512,2048,0.018424532810846963
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,512,1536,0.005644799768924713
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,512,5120,0.01452906628449758
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,512,1536,0.01728960076967875
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,512,1024,0.0048096001148223875
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,512,1024,0.016744534174601235
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,512,768,0.004186666508515676
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,512,768,0.016420267025629678
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,512,4096,0.012261333068211873
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,512,512,0.0037941334148248037
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,512,512,0.015608533223470052
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,512,3072,0.010377599795659383
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,512,256,0.003389866650104523
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,512,256,0.015020799636840821
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,512,3584,0.012034133076667786
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,512,128,0.0031008000175158186
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,512,128,0.014456533392270408
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,512,2560,0.009851732850074768
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,512,64,0.0029557332396507262
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,512,64,0.014698666334152222
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,512,32,0.002962133288383484
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,512,32,0.014670933286348978
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,256,65536,0.04248533248901367
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,256,65536,0.1554986635843913
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,512,1024,0.00684799998998642
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,512,768,0.005986133217811584
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,256,16384,0.013195733229319254
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,512,2048,0.008397866288820903
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,256,16384,0.04042559862136841
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,256,16384,0.0382207989692688
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,256,12288,0.01095146636168162
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,256,12288,0.03248320023218791
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,256,10240,0.010469333330790202
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,256,10240,0.029819732904434203
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,256,8192,0.00837546686331431
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,512,1536,0.008040533463160197
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,256,8192,0.02658240000406901
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,256,7168,0.01018986701965332
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,256,7168,0.025525333484013875
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,512,512,0.005739733576774597
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,256,6144,0.008407466610272725
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,256,6144,0.024710400899251302
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,512,256,0.005278933544953664
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,256,5120,0.00856213370958964
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,512,128,0.004936533172925314
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,256,5120,0.02305493354797363
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,256,4096,0.0077567999561627705
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,256,4096,0.02220266660054525
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,256,65536,0.13647146224975587
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,256,3584,0.008190933366616566
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,256,3584,0.020248534282048543
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,256,3584,0.01193173329035441
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,256,3072,0.007673599819342296
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,256,3072,0.019215999046961467
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,256,12288,0.029281065861384077
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,256,2560,0.0064064001043637585
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,256,2560,0.018988800048828126
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,256,10240,0.024238934119542442
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,256,2048,0.005629866818586985
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,256,8192,0.019933867454528808
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,256,2048,0.018526933590571084
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,256,1536,0.004969599843025208
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,256,7168,0.018071466684341432
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,256,1536,0.016807466745376587
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,256,1024,0.004266666869322458
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,256,1024,0.016151466965675355
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,256,6144,0.01607360045115153
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,256,768,0.003953066716591517
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,256,768,0.015875200430552162
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,256,768,0.005959466596444448
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,256,5120,0.014094932874043783
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,256,512,0.0035391998787721
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,256,512,0.015549866358439126
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,256,256,0.0032117334504922234
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,256,4096,0.012217600146929424
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,256,256,0.01571626663208008
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,256,128,0.003010133405526479
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,256,128,0.0146506667137146
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,256,3072,0.010150399804115296
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,256,64,0.0028181334336598715
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,256,64,0.014640000462532044
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,256,32,0.002845866729815801
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,256,2560,0.009678933024406432
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,256,32,0.014386133352915446
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,128,65536,0.03453546762466431
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,256,2048,0.008295466502507527
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,128,65536,0.15019307136535645
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,128,16384,0.012689066926638284
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,256,1536,0.007776000102361043
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,128,16384,0.03718613386154175
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,128,12288,0.008885332942008972
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,128,12288,0.03183573285738627
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,128,10240,0.00976746678352356
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,256,1024,0.006614399949709575
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,128,10240,0.028837333122889202
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,256,512,0.005602133274078369
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,128,8192,0.007885866860548655
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,128,8192,0.026107732454935712
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,256,256,0.005070933202902476
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,128,7168,0.007480533421039581
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,256,128,0.00487360010544459
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,128,7168,0.025663999716440837
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,128,6144,0.008405333757400513
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,128,6144,0.023543467124303184
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,128,16384,0.036584532260894774
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,128,65536,0.1367743968963623
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,128,5120,0.007719466586907704
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,128,5120,0.023098667462666832
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,128,4096,0.0070271998643875126
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,128,12288,0.027876265843709308
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,128,4096,0.022054400046666464
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,128,3584,0.006742399930953979
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,128,10240,0.023772799968719484
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,128,3584,0.02165013353029887
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,128,3584,0.011702400445938111
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,128,3072,0.006423466900984447
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,128,3072,0.019053866465886436
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,128,2560,0.0061706667145093284
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,128,2560,0.018439465761184694
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,128,2048,0.005471999943256378
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,128,2048,0.018307199080785118
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,128,8192,0.01988906661669413
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,128,1536,0.004882133503754934
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,128,7168,0.017953066031138103
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,128,1536,0.01712426741917928
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,128,1024,0.004181333382924398
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,128,1024,0.016361600160598753
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,128,6144,0.01605013310909271
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,128,768,0.0038592000802357995
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,128,768,0.015955199797948204
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,128,5120,0.01402773360411326
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,128,512,0.0035061334570248926
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,128,512,0.015494400262832641
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,128,4096,0.012033067146937053
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,128,256,0.003124266614516576
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,128,256,0.015174399813016257
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,128,3072,0.010108799735705058
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,128,128,0.002946133414904277
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,128,128,0.015040000279744467
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,128,2560,0.009305600325266521
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,128,64,0.0028053333361943563
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,128,64,0.014664533734321594
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,128,2048,0.008019199967384339
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,128,32,0.002921599894762039
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,128,32,0.01467519998550415
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,64,65536,0.03158506751060486
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,128,1536,0.007755733529726664
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,64,16384,0.007567999760309856
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,64,65536,0.14729706446329754
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,64,12288,0.007677866518497467
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,64,16384,0.0358517328898112
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,128,1024,0.006702933212121327
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,64,10240,0.00714026689529419
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,64,12288,0.030346665779749555
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,64,10240,0.02759893337885539
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,64,8192,0.006622933348019918
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,64,8192,0.025384533405303954
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,64,7168,0.006421333551406861
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,64,7168,0.024308266242345174
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,64,6144,0.006178133189678192
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,128,768,0.0059456000725428265
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,64,5120,0.00654720018307368
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,64,5120,0.022757333517074586
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,64,6144,0.023362133900324503
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,64,4096,0.0061247999469439185
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,64,3584,0.006503466765085857
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,64,4096,0.02118399937947591
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,64,3584,0.01993173360824585
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,64,3072,0.006208000083764395
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,64,2560,0.0187285323937734
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,64,3072,0.01941759983698527
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,64,2560,0.0060245335102081295
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,64,2048,0.0053845331072807315
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,64,1536,0.0047978664437929785
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,64,2048,0.017350399494171144
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,64,1536,0.01697173317273458
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,64,1024,0.004075733323891958
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,64,768,0.0037717332442601522
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,64,1024,0.01606826682885488
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,64,768,0.015870933731396995
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,64,512,0.003386666625738144
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,64,256,0.003036800026893616
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,64,512,0.015503999590873719
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,64,128,0.014897066354751586
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,64,256,0.015054933230082192
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,64,128,0.0028607999285062153
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,64,64,0.0026975999275843303
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,128,512,0.005621333420276642
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,64,32,0.0027445333699385325
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,64,64,0.014865066607793173
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,64,32,0.014457600315411887
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,32,65536,0.02942720055580139
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,32,16384,0.007386666536331177
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,32,65536,0.1478559970855713
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,32,12288,0.006651733318964641
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,32,16384,0.03480319976806641
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,32,10240,0.027404799064000444
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,32,12288,0.029461334149042766
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,32,10240,0.006497066716353099
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,32,8192,0.006389333307743073
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,128,256,0.005053866902987162
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,32,7168,0.0062496001521746315
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,32,8192,0.025271467367808026
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,32,6144,0.006025599936644236
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,32,7168,0.024394667148590087
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,512,128,128,0.004867200056711833
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,32,6144,0.023060266176859537
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,32,5120,0.006358399987220764
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,32,5120,0.022460800409317017
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,32,4096,0.005981866518656413
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,32,3584,0.006401066482067108
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,32,4096,0.02079040010770162
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,32,3072,0.006116266548633576
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,32,3584,0.020181334018707274
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,32,3072,0.019322667519251505
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,32,2560,0.006107733150323232
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,32,2560,0.01824000080426534
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,32,2048,0.005392000079154968
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,32,2048,0.018222934007644652
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,32,1536,0.004804266492525736
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,32,1536,0.016730666160583496
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,32,1024,0.004062933226426443
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,32,1024,0.016167466839154564
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,32,768,0.003756800045569738
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,32,768,0.015687466661135355
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,32,512,0.0033429334561030067
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,32,256,0.003092266619205475
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,32,512,0.015468800067901611
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,32,256,0.014797866344451904
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,32,128,0.002882133424282074
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,32,128,0.014727466305096946
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,32,64,0.0027146667242050173
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,512,32,32,0.0026848000784715016
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,32,64,0.014702933033307395
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,512,32,32,0.014482133587201438
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,65536,12288,0.4637397448221843
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,65536,16384,0.6059146881103515
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,65536,10240,0.7052288055419922
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,65536,12288,0.8386986414591471
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,65536,16384,1.0828661600748697
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,65536,10240,0.39093332290649413
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,65536,8192,0.31738986968994143
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,65536,8192,0.5518431981404622
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,65536,8192,0.3949898719787598
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,65536,7168,0.27257814407348635
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,65536,7168,0.4933642705281575
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,65536,6144,0.4162421226501465
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,65536,6144,0.23612586657206217
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,65536,5120,0.3556565284729004
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,65536,5120,0.2092149257659912
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,65536,4096,0.2875253359476725
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,65536,4096,0.17343146006266277
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,65536,3584,0.24951680501302084
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,65536,3584,0.15565013885498047
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,65536,12288,0.6262986501057942
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,65536,16384,0.7946421305338542
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,65536,3072,0.22953707377115884
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,65536,10240,0.5338069279988606
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,65536,3072,0.14167680740356445
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,65536,2560,0.11602133115132648
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,65536,2560,0.1863925298055013
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,65536,2048,0.14961279233296712
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,65536,7168,0.34818239212036134
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,65536,2048,0.10007039705912273
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,65536,1536,0.11716586748758953
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,65536,6144,0.3110549290974935
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,65536,1536,0.08336533705393473
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,65536,1024,0.08260373274485269
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,65536,5120,0.2555253346761068
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,65536,1024,0.06943253676096597
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,65536,768,0.06712213357289633
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,65536,768,0.061187199751536046
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,65536,4096,0.20243733723958335
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,65536,768,0.06420799891153971
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,65536,512,0.055988268057505286
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,65536,512,0.05221759875615438
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,65536,256,0.03927146593729655
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,65536,256,0.04395413398742676
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,65536,256,0.04391680161158244
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,65536,3584,0.17878613471984864
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,65536,128,0.030770132939020794
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,65536,128,0.04123626550038655
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,65536,3072,0.1551029364267985
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,65536,64,0.025396267573038738
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,65536,32,0.02646399935086568
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,65536,2560,0.135641606648763
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,65536,64,0.04003626505533854
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,65536,32,0.04241280158360799
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,65536,2048,0.11220160325368245
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,16384,16384,0.27572479248046877
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,65536,1536,0.09004267056783041
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,16384,16384,0.1732042630513509
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,16384,16384,0.19705920219421386
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,65536,1024,0.06942613124847412
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,16384,65536,0.6544415791829427
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,16384,12288,0.2092202663421631
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,16384,12288,0.1372320016225179
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,16384,65536,1.1583946228027344
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,65536,512,0.051345066229502356
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,16384,10240,0.17476906776428222
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,16384,10240,0.11834452946980793
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,16384,8192,0.1422826608022054
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,16384,8192,0.09659946759541829
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,16384,7168,0.12537919680277507
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,16384,7168,0.08639679749806722
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,16384,6144,0.1066912015279134
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,65536,128,0.03988053401311238
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,16384,6144,0.07526826858520508
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,16384,5120,0.08970773220062256
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,16384,5120,0.06539946794509888
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,16384,65536,0.8114421208699545
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,16384,12288,0.1546677271525065
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,16384,4096,0.0877621332804362
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,16384,4096,0.05538026491800944
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,16384,10240,0.13299946784973143
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,16384,3584,0.07714453538258871
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,16384,8192,0.09824533462524414
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,16384,3584,0.05114453236262003
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,16384,3072,0.06755626996358235
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,16384,3072,0.046300800641377766
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,16384,2560,0.05792106787363688
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,16384,7168,0.08926293055216471
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,16384,2560,0.04157013495763143
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,16384,2048,0.03928106625874837
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,16384,6144,0.08016106287638346
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,16384,2048,0.03682773510615031
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,16384,2048,0.03247039914131165
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,16384,1536,0.030461867650349934
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,16384,1536,0.0321941335995992
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,16384,1024,0.021794132391611733
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,16384,1024,0.027525333563486735
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,16384,1024,0.021574399868647256
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,16384,768,0.01759786605834961
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,16384,5120,0.06568533182144165
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,16384,768,0.02516053318977356
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,16384,512,0.013738666971524557
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,16384,512,0.022934399048487344
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,16384,256,0.009973333279291789
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,16384,256,0.020572799444198608
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,16384,256,0.014233600099881491
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,16384,128,0.007579733431339264
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,16384,128,0.018869332472483315
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,16384,4096,0.05283840099970499
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,16384,64,0.006558933357397716
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,16384,3584,0.04751253525416056
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,16384,3072,0.0421834667523702
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,16384,64,0.019197867314020792
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,16384,32,0.006963199873765309
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,16384,32,0.019552000363667808
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,16384,2560,0.0367082675298055
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,12288,16384,0.19919999440511066
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,16384,1536,0.027501867214838667
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,12288,16384,0.1350144068400065
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,16384,768,0.019245866934458414
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,12288,65536,0.4981386820475261
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,16384,512,0.01643946667512258
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,12288,65536,0.7782453536987305
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,12288,12288,0.15117866198221844
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,12288,12288,0.10389760335286458
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,12288,10240,0.12682987054189046
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,12288,8192,0.10153066317240397
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,12288,10240,0.10757866700490315
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,12288,8192,0.07511253356933593
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,12288,7168,0.09214186668395996
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,16384,128,0.012939733266830445
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,12288,7168,0.06811520258585611
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,12288,6144,0.07747306823730468
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,12288,6144,0.060634664694468175
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,12288,5120,0.06593173344930013
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,12288,5120,0.05306773185729981
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,12288,12288,0.12716053326924642
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,12288,4096,0.052697598934173584
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,12288,4096,0.045310934384663895
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,12288,6144,0.06394559939702352
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,12288,16384,0.15976319313049317
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,12288,3584,0.04736640055974324
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,12288,3584,0.04233920176823934
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,12288,65536,0.722708257039388
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,12288,3072,0.041943466663360594
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,12288,3072,0.04152959982554118
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,12288,2560,0.0356170654296875
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,12288,2560,0.0353493332862854
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,12288,10240,0.10145493348439534
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,12288,2048,0.02916160027186076
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,12288,2048,0.03152746756871541
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,12288,7168,0.07270932992299398
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,12288,1536,0.023989333709081014
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,12288,8192,0.08358933130900065
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,12288,1536,0.027990400791168213
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,12288,1024,0.0171509325504303
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,12288,1024,0.02476693391799927
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,12288,5120,0.05401173432668051
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,12288,768,0.014402133226394654
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,12288,768,0.023481599489847817
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,12288,3072,0.03472746610641479
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,12288,4096,0.0435914675394694
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,12288,512,0.011372799674669903
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,12288,512,0.021118932962417604
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,12288,1536,0.021178666750590006
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,12288,256,0.008504533767700195
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,12288,256,0.0189194659392039
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,12288,2560,0.03110506733258565
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,12288,128,0.00660159985224406
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,12288,128,0.0180074671904246
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,12288,3584,0.03937919934590657
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,12288,64,0.00588266650835673
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,12288,64,0.01806186636288961
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,12288,32,0.006097066899140676
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,12288,32,0.01848640044530233
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,12288,768,0.015101866920789084
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,12288,2048,0.02645866672197978
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,12288,1024,0.017896533012390137
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,10240,16384,0.17344427108764648
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,10240,65536,0.45151573816935225
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,10240,65536,0.7064426422119141
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,10240,16384,0.12142293453216553
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,10240,12288,0.14766292572021483
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,10240,12288,0.09581973552703857
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,10240,10240,0.10981439749399821
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,10240,12288,0.1006282647450765
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,10240,10240,0.08335786660512289
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,10240,8192,0.09007679621378581
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,10240,8192,0.06952106952667236
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,10240,7168,0.08016533056894938
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,10240,7168,0.06329813400904337
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,12288,256,0.0108106662829717
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,10240,16384,0.13469546635945637
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,10240,6144,0.06850132942199708
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,12288,128,0.009860266248385112
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,10240,6144,0.056309334437052404
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,10240,65536,0.522268803914388
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,12288,512,0.01306773324807485
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,10240,6144,0.052693335215250645
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,10240,5120,0.05726079940795899
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,10240,5120,0.04967999855677287
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,10240,5120,0.0473034660021464
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,10240,4096,0.04680639902750651
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,10240,4096,0.042504533131917314
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,10240,3584,0.04250880082448323
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,10240,10240,0.0855626662572225
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,10240,3584,0.04052799940109253
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,10240,8192,0.07052906354268393
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,10240,3072,0.037300264835357665
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,10240,3072,0.03802666664123535
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,10240,2560,0.03207040031750997
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,10240,2560,0.026605866352717084
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,10240,2560,0.03325013319651286
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,10240,2048,0.02656000057856242
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,10240,2048,0.02938773234685262
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,10240,1536,0.021630932887395225
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,10240,1536,0.026760532458623247
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,10240,7168,0.062554665406545
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,10240,1024,0.01616106629371643
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,10240,1024,0.024069333076477052
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,10240,768,0.013112533092498779
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,10240,768,0.022153600056966146
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,10240,512,0.010687999924023946
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,10240,512,0.020347734292348228
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,10240,4096,0.037614933649698895
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,10240,256,0.007587199906508128
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,10240,3584,0.03426346778869629
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,10240,256,0.018270933628082277
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,10240,256,0.009873066345850627
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,10240,128,0.006196266909440359
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,10240,3072,0.029330132404963176
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,10240,128,0.017218132813771568
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,10240,64,0.005644799768924713
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,10240,64,0.01746666630109151
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,10240,32,0.0057322666049003605
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,10240,2048,0.02341653307278951
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,10240,32,0.017496534188588462
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,10240,1536,0.01908373236656189
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,10240,1024,0.01618773341178894
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,10240,768,0.013705600301424661
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,8192,16384,0.1376138687133789
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,8192,16384,0.10213653246561687
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,8192,12288,0.1045258680979411
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,10240,512,0.011917866269747416
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,8192,65536,0.5481728235880534
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,8192,12288,0.08034453392028809
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,8192,65536,0.37221333185831706
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,8192,10240,0.08789013226826986
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,8192,10240,0.076583464940389
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,8192,8192,0.07148160139719645
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,8192,8192,0.05844480196634928
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,8192,7168,0.06204373439153036
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,8192,7168,0.05356800158818563
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,8192,6144,0.054223998387654626
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,8192,6144,0.047866666316986085
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,10240,128,0.009113599856694538
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,8192,5120,0.04504213333129883
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,8192,5120,0.042743468284606935
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,8192,12288,0.083188263575236
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,8192,4096,0.03708266814549764
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,8192,4096,0.037113598982493085
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,8192,6144,0.04418239990870158
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,8192,16384,0.12218986352284748
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,8192,3584,0.033217066526412965
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,8192,3584,0.03405333360036214
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,8192,10240,0.07087360223134359
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,8192,3072,0.02943466703097026
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,8192,65536,0.4752223968505859
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,8192,3072,0.03144213358561198
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,8192,7168,0.05020586649576823
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,8192,2560,0.025361067056655882
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,8192,2560,0.02956693371136983
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,8192,2048,0.020550400018692017
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,8192,2048,0.02648319999376933
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,8192,8192,0.05687146584192911
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,8192,1536,0.01625279982884725
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,8192,1536,0.02396906614303589
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,8192,1024,0.012377599875132244
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,8192,1024,0.021964800357818604
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,8192,5120,0.03750079870223999
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,8192,768,0.010506666700045268
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,8192,768,0.020317866404851278
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,8192,3072,0.025910399357477826
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,8192,512,0.008553600311279297
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,8192,512,0.018713599443435668
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,8192,4096,0.03151359955469767
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,8192,256,0.00631466656923294
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,8192,256,0.01752426624298096
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,8192,1536,0.016293332974116007
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,8192,128,0.005342933535575867
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,8192,2560,0.022721066077550253
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,8192,128,0.016833066940307617
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,8192,64,0.004829866687456766
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,8192,64,0.01688106656074524
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,8192,32,0.005039999882380167
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,8192,3584,0.028252800305684406
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,8192,32,0.0171615997950236
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,8192,2048,0.019912532965342202
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,7168,65536,0.47582079569498703
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,7168,65536,0.3408778508504232
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,7168,16384,0.13259306748708088
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,7168,16384,0.09307733376820883
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,7168,12288,0.09127893447875976
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,7168,12288,0.07362986405690511
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,8192,768,0.011978666981061299
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,7168,10240,0.07755093574523926
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,7168,10240,0.06475520133972168
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,8192,1024,0.013969066739082336
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,7168,8192,0.06255573431650797
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,7168,8192,0.05495040019353231
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,8192,256,0.008781866232554118
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,7168,7168,0.05581653515497843
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,8192,128,0.00809386670589447
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,7168,7168,0.05045226812362671
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,8192,512,0.00993173321088155
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,7168,6144,0.048254934946695964
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,7168,6144,0.04554346799850464
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,7168,5120,0.040609065691630045
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,7168,5120,0.04078293244043986
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,7168,12288,0.0777728001276652
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,7168,4096,0.03364906708399455
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,7168,4096,0.03557866811752319
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,7168,16384,0.10389866828918456
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,7168,3584,0.030242133140563964
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,7168,65536,0.43067839940388997
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,7168,3584,0.0327455997467041
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,7168,3072,0.026742400725682576
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,7168,3072,0.031113600730895995
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,7168,10240,0.06895893414815267
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,7168,2560,0.023181867599487305
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,7168,2560,0.027666133642196656
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,7168,6144,0.04165759881337484
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,7168,2048,0.01917333404223124
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,7168,2048,0.0254528005917867
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,7168,8192,0.05531733433405558
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,7168,1536,0.015310933192571005
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,7168,1536,0.023461333910624185
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,7168,7168,0.048469332853953044
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,7168,1024,0.012060800194740295
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,7168,1024,0.02133013407389323
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,7168,5120,0.036305065949757895
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,7168,768,0.010471466183662414
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,7168,768,0.020562134186426797
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,7168,3072,0.02394346594810486
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,7168,512,0.008777599533398945
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,7168,512,0.019338667392730713
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,7168,4096,0.02994133234024048
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,7168,256,0.006488533318042755
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,7168,256,0.017842133839925133
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,7168,2560,0.022011733055114745
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,7168,3584,0.028065067529678345
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,7168,128,0.005736533304055532
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,7168,128,0.01735573410987854
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,7168,64,0.00528106689453125
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,7168,64,0.017398399114608765
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,7168,32,0.005494399865468343
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,7168,32,0.01758079926172892
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,7168,1536,0.015898666779200234
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,6144,65536,0.4079903920491536
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,6144,65536,0.3194613456726074
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,6144,16384,0.10398506323496501
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,7168,1024,0.013524267077445983
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,7168,2048,0.01911146640777588
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,6144,16384,0.0849514643351237
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,6144,12288,0.08079040050506592
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,6144,12288,0.0683690627415975
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,7168,768,0.011556266744931539
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,6144,10240,0.06825493176778158
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,6144,10240,0.06167893409729004
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,7168,256,0.0084906667470932
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,6144,8192,0.05286080042521159
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,7168,512,0.00997866690158844
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,6144,8192,0.050774399439493814
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,7168,128,0.007925333579381307
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,6144,7168,0.04676160017649333
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,6144,7168,0.046565334002176925
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,6144,6144,0.0406272013982137
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,6144,6144,0.04290133317311605
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,6144,5120,0.039394132296244305
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,6144,5120,0.03823893467585246
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,6144,65536,0.424940808614095
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,6144,4096,0.03067306677500407
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,6144,4096,0.03504426479339599
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,6144,12288,0.07209493319193522
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,6144,3584,0.026070400079091387
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,6144,3584,0.03150720000267029
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,6144,16384,0.0938645362854004
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,6144,3072,0.022793600956598915
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,6144,3072,0.030004266897837324
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,6144,10240,0.06088639895121256
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,6144,2560,0.019272534052530925
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,6144,2560,0.026313600937525432
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,6144,6144,0.038427734375
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,6144,2048,0.015921066204706825
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,6144,2048,0.025077333052953083
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,6144,7168,0.04277439912160237
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,6144,1536,0.013175466656684875
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,6144,1536,0.022708266973495483
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,6144,8192,0.049184000492095946
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,6144,1024,0.010406399766604107
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,6144,1024,0.020808533827463786
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,6144,5120,0.03300799926122029
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,6144,768,0.008776533603668212
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,6144,768,0.01899519960085551
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,6144,3072,0.021589332818984987
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,6144,512,0.007231999933719635
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,6144,512,0.018037333091100057
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,6144,4096,0.027050666014353436
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,6144,256,0.005458133419354757
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,6144,256,0.017132800817489625
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,6144,3584,0.02505706747372945
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,6144,128,0.004762666424115499
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,6144,128,0.016184533635775246
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,6144,2560,0.020138667027155558
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,6144,64,0.004248533149560293
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,6144,64,0.016217600305875143
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,6144,32,0.004537599782148996
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,6144,1536,0.014461867014567056
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,6144,32,0.0164490669965744
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,5120,65536,0.3538698514302572
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,5120,65536,0.2775989214579264
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,5120,16384,0.08909866809844971
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,5120,16384,0.07871893246968588
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,6144,1024,0.012650666634241739
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,5120,12288,0.06792746384938558
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,6144,2048,0.017498666048049928
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,5120,12288,0.06341226498285929
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,5120,10240,0.057254401842753086
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,5120,10240,0.05442026853561401
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,6144,768,0.010554666320482891
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,5120,8192,0.0468394676844279
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,5120,8192,0.04720640182495117
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,6144,256,0.008096000055472057
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,5120,7168,0.041493332386016844
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,6144,512,0.009106133381525676
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,5120,7168,0.043722665309906004
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,5120,6144,0.03620586792627971
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,5120,6144,0.03861440022786458
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,5120,65536,0.2768202781677246
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,6144,128,0.00751146674156189
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,5120,16384,0.07201386292775472
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,5120,5120,0.031032532453536987
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,5120,4096,0.025521065791447955
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,5120,5120,0.03430080016454061
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,5120,4096,0.029469867547353108
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,5120,3584,0.023044266303380332
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,5120,12288,0.05418773492177328
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,5120,3584,0.02039466698964437
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,5120,3584,0.02781333327293396
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,5120,3072,0.01995413303375244
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,5120,3072,0.026510934034983318
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,5120,2560,0.016898133357365928
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,5120,10240,0.046239999930063884
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,5120,2560,0.024872533480326333
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,5120,2048,0.014295466740926108
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,5120,2048,0.022909865776697794
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,5120,8192,0.03886293172836304
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,5120,1536,0.01169706682364146
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,5120,1536,0.021357866128285725
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,5120,7168,0.03415466547012329
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,5120,1024,0.009326933821042379
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,5120,6144,0.029730133215586346
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,5120,1024,0.019719467560450236
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,5120,1024,0.010406399766604107
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,5120,768,0.008066133161385854
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,5120,5120,0.02645866672197978
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,5120,768,0.01874133348464966
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,5120,512,0.00660693347454071
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,5120,4096,0.02222933371861776
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,5120,512,0.017318399747212727
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,5120,256,0.005197866757710775
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,5120,256,0.01664959987004598
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,5120,3072,0.018244266510009766
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,5120,128,0.004404266675313314
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,5120,2560,0.016356266538302102
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,5120,128,0.01628266672293345
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,5120,128,0.006733866532643636
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,5120,64,0.004243200023969015
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,5120,32,0.004279466470082601
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,5120,64,0.01597866714000702
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,5120,2048,0.014973866939544677
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,5120,32,0.016264533003171287
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,4096,65536,0.2767221450805664
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,4096,65536,0.24863893191019693
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,4096,65536,0.2669706662495931
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,5120,1536,0.012414933244387309
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,4096,16384,0.08692693710327148
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,4096,16384,0.07752319971720377
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,4096,12288,0.07549760341644288
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,4096,12288,0.06500266790390015
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,4096,10240,0.06663999954859415
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,4096,10240,0.05677973429361979
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,4096,8192,0.042991999785105386
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,4096,8192,0.04251413345336914
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,4096,7168,0.04514666795730591
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,4096,7168,0.03906666835149129
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,5120,768,0.008851200342178345
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,4096,6144,0.039686401685078934
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,5120,512,0.007986133297284443
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,4096,6144,0.034986666838328045
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,4096,5120,0.03431573311487834
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,5120,256,0.007188266515731812
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,4096,5120,0.03078293402989705
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,4096,4096,0.0206496000289917
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,4096,4096,0.027155200640360515
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,4096,16384,0.060893865426381436
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,4096,3584,0.01904426614443461
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,4096,12288,0.046171732743581134
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,4096,3584,0.02595199942588806
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,4096,3072,0.016942934195200602
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,4096,10240,0.04139519929885864
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,4096,3072,0.02458239992459615
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,4096,2560,0.014221866925557455
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,4096,2560,0.023056000471115112
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,4096,8192,0.03227519989013672
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,4096,2048,0.012503467003504434
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,4096,2048,0.021565866470336915
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,4096,2048,0.013017599781354269
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,4096,1536,0.010054399569829304
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,4096,1536,0.020177066326141357
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,4096,7168,0.028468267122904463
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,4096,1024,0.007940266529719036
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,4096,6144,0.025471999247868853
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,4096,1024,0.018245333433151247
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,4096,768,0.006920533378918965
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,4096,768,0.01728746692339579
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,4096,5120,0.022338134050369263
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,4096,512,0.005423999826113383
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,4096,4096,0.019040000438690186
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,4096,512,0.016775466998418174
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,4096,256,0.004487466812133789
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,4096,256,0.01625599960486094
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,4096,3584,0.017349332571029663
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,4096,128,0.004006399959325791
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,4096,3072,0.01572160025437673
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,4096,128,0.015864533185958863
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,4096,64,0.003702399879693985
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,4096,64,0.01575146714846293
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,4096,2560,0.01420906682809194
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,4096,32,0.003923200070858002
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,4096,32,0.015821866194407144
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,3584,65536,0.24541014035542807
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,3584,65536,0.2331157366434733
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,4096,1536,0.010924800237019857
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,3584,16384,0.06895146369934083
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,3584,16384,0.06719573338826498
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,4096,1024,0.008851200342178345
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,3584,12288,0.053223466873168944
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,3584,12288,0.04384640057881673
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,3584,12288,0.05465279817581177
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,4096,768,0.007959466675917308
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,3584,10240,0.04401386578877767
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,3584,10240,0.048103467623392744
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,3584,8192,0.03562026818593343
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,3584,8192,0.0414250651995341
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,4096,512,0.0071370666225751235
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,3584,7168,0.03208640019098918
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,3584,7168,0.037110400199890134
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,4096,256,0.006446933249632518
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,3584,6144,0.02802026669184367
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,3584,6144,0.03355093399683635
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,4096,128,0.00619946668545405
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,3584,5120,0.024221867322921753
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,3584,5120,0.02956693371136983
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,3584,16384,0.05716053247451782
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,3584,65536,0.22585280736287436
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,3584,4096,0.02092693249384562
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,3584,4096,0.027144533395767213
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,3584,3584,0.01922453244527181
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,3584,3584,0.02576106588045756
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,3584,10240,0.037351465225219725
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,3584,3072,0.01581333378950755
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,3584,8192,0.031461334228515624
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,3584,3072,0.024346667528152465
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,3584,2560,0.013831466436386108
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,3584,7168,0.027802666028340656
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,3584,2560,0.02317333420117696
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,3584,6144,0.024526933828989662
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,3584,2048,0.012150399883588155
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,3584,2048,0.02139413356781006
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,3584,1536,0.011274666587511698
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,3584,1536,0.010356266299883525
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,3584,1536,0.020153599977493285
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,3584,1024,0.008739200234413148
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,3584,1024,0.018576000134150186
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,3584,768,0.0077002664407094315
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,3584,768,0.01746666630109151
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,3584,5120,0.02213653326034546
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,3584,512,0.005806933343410492
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,3584,512,0.016838399569193523
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,3584,4096,0.018549333016077675
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,3584,256,0.0046634669105211895
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,3584,256,0.01614400049050649
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,3584,3072,0.015321600437164306
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,3584,128,0.004109866668780645
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,3584,128,0.015528532862663268
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,3584,2560,0.014094932874043783
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,3584,64,0.003789866715669632
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,3584,64,0.015941333770751954
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,3584,2048,0.012750933567682901
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,3584,32,0.003988266736268997
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,3584,32,0.015957333644231162
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,3584,1024,0.008662399649620057
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,3072,65536,0.21348586082458496
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,3072,65536,0.2185919920603434
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,3584,3584,0.017257599035898845
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,3072,16384,0.06054506699244181
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,3072,12288,0.0459935983022054
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,3072,16384,0.06261546611785888
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,3072,12288,0.05103466510772705
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,3072,10240,0.04101119836171468
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,3072,10240,0.04456213315327962
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,3072,10240,0.035104000568389894
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,3072,8192,0.031140265862147014
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,3584,512,0.007196799914042156
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,3072,8192,0.037804798285166426
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,3072,7168,0.02979733347892761
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,3072,7168,0.03430080016454061
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,3584,768,0.007885866860548655
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,3072,6144,0.024996266762415568
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,3584,256,0.006557866434256236
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,3072,6144,0.03129173318545024
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,3072,5120,0.021411200364430748
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,3072,5120,0.028358399868011475
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,3584,128,0.006101333101590474
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,3072,4096,0.01830186645189921
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,3072,4096,0.025384533405303954
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,3072,65536,0.2238976001739502
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,3072,16384,0.05287466843922933
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,3072,3584,0.01591146687666575
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,3072,3584,0.024473599592844644
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,3072,12288,0.04018239974975586
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,3072,3072,0.01346986691157023
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,3072,3072,0.023843199014663696
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,3072,2560,0.012694399555524191
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,3072,2560,0.022180267175038657
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,3072,8192,0.028845866521199543
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,3072,2048,0.01015786627928416
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,3072,2048,0.02054826617240906
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,3072,7168,0.025865600506464644
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,3072,1536,0.009514666597048442
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,3072,1536,0.020083200931549073
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,3072,6144,0.023291732867558798
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,3072,1024,0.006934399902820587
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,3072,1024,0.018093866109848023
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,3072,5120,0.020155733823776244
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,3072,768,0.0058794667323430385
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,3072,768,0.017030400037765504
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,3072,4096,0.01698026657104492
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,3072,512,0.005162666738033295
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,3072,512,0.016176000237464905
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,3072,3584,0.015970133741696677
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,3072,3072,0.014411733547846476
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,3072,256,0.004203733305136363
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,3072,256,0.015980799992879234
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,3072,128,0.0037087999284267426
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,3072,2560,0.012898133198420206
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,3072,128,0.015848533312479655
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,3072,64,0.0035391998787721
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,3072,2048,0.01181013286113739
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,3072,64,0.015731199582417806
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,3072,32,0.0037205333511034647
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,3072,32,0.015427199999491372
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,3072,1536,0.010161067048708599
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,2560,65536,0.18354454040527343
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,2560,65536,0.20386452674865724
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,3072,1024,0.008037333190441132
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,2560,16384,0.05422613223393759
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,2560,16384,0.059429331620534265
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,2560,12288,0.039740800857543945
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,2560,12288,0.048000001907348634
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,3072,768,0.007485866546630859
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,2560,10240,0.033046400547027587
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,2560,10240,0.041756800810496014
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,3072,512,0.006790400048096975
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,2560,8192,0.02684906721115112
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,2560,8192,0.03505280017852783
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,3072,256,0.0060917332768440245
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,2560,7168,0.02436479926109314
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,3072,128,0.005861333509286245
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,2560,7168,0.03211200038592021
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,2560,6144,0.021869866053263347
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,2560,6144,0.029190399249394733
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,2560,5120,0.018747733036677042
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,2560,5120,0.02648319999376933
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,2560,16384,0.04690560102462769
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,2560,65536,0.21602986653645834
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,2560,4096,0.014403200149536133
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,2560,4096,0.024050132433573405
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,2560,3584,0.012810666362444559
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,2560,12288,0.04129813512166341
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,2560,3584,0.02355946699778239
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,2560,3072,0.012483200430870056
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,2560,3072,0.022050132354100548
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,2560,10240,0.03424959977467855
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,2560,2560,0.0102101335922877
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,2560,2560,0.020949333906173706
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,2560,8192,0.027988266944885255
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,2560,2048,0.009507200121879578
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,2560,2048,0.019539199272791543
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,2560,7168,0.02355413238207499
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,2560,1536,0.008045866588751475
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,2560,6144,0.022030933698018392
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,2560,1536,0.018769067525863648
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,2560,1024,0.006265600025653839
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,2560,1024,0.017218132813771568
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,2560,5120,0.019129600127538046
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,2560,768,0.005422933399677277
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,2560,768,0.016588800152142844
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,2560,4096,0.016084266702334087
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,2560,512,0.004659200211366018
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,2560,512,0.01602240006128947
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,2560,3072,0.013517866532007853
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,2560,256,0.003973333289225897
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,2560,256,0.016214399536450704
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,2560,2048,0.01076800028483073
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,2560,128,0.003602133442958196
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,2560,128,0.015146666765213012
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,2560,3584,0.014986667037010192
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,2560,64,0.003472000112136205
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,2560,64,0.015318399667739869
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,2560,1024,0.007677866518497467
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,2560,32,0.0035616000493367515
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,2560,32,0.015176533659299215
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,2048,65536,0.1435424009958903
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,2048,65536,0.17992746035257975
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,2560,768,0.007351466516653697
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,2048,16384,0.04493333498636882
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,2560,2560,0.01241386632124583
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,2048,16384,0.06432960033416749
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,2048,12288,0.048135467370351154
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,2048,12288,0.05050133466720581
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,2048,10240,0.0421120007832845
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,2048,10240,0.040405333042144775
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,2560,1536,0.009076266487439474
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,2048,8192,0.03192639946937561
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,2048,8192,0.03349226713180542
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,2560,256,0.0061482667922973635
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,2048,7168,0.028908799091974895
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,2560,512,0.006728533407052357
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,2048,7168,0.03033706744511922
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,2048,6144,0.025150932868321735
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,2048,6144,0.02852906584739685
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,2560,128,0.005913599828879039
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,2048,5120,0.019966934124628702
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,2048,5120,0.026078933477401735
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,2048,65536,0.1950954596201579
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,2048,12288,0.03531306584676107
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,2048,4096,0.014314666390419006
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,2048,4096,0.023772799968719484
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,2048,16384,0.04574506680170695
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,2048,3584,0.013032533725102744
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,2048,3584,0.02283946673075358
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,2048,3072,0.012090667088826498
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,2048,3072,0.021205333868662517
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,2048,10240,0.03004693388938904
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,2048,2560,0.010644267002741497
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,2048,2560,0.019900800784428914
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,2048,8192,0.02487679918607076
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,2048,2048,0.008605866630872091
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,2048,2048,0.019211733341217042
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,2048,6144,0.019694934288660683
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,2048,7168,0.021886932849884033
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,2048,1536,0.007423999905586243
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,2048,1536,0.018062933286031087
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,2048,5120,0.017459199825922648
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,2048,1024,0.005784533421198527
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,2048,1024,0.016910932461420693
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,2048,768,0.004906666775544485
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,2048,768,0.016657066345214844
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,2048,3072,0.012300800283749897
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,2048,512,0.004331733286380768
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,2048,512,0.015873066584269204
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,2048,4096,0.01483626663684845
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,2048,256,0.003774933268626531
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,2048,256,0.01594239970048269
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,2048,3584,0.013594667116800943
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,2048,128,0.0034058667719364167
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,2048,128,0.015068800250689188
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,2048,2560,0.010785067081451416
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,2048,64,0.0033312000334262846
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,2048,64,0.015214932958285013
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,2048,32,0.0034314667185147605
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,2048,32,0.015235199530919393
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,1536,65536,0.1309600035349528
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,1536,65536,0.17108267148335773
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,2048,1536,0.008317866424719492
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,1536,16384,0.0361792008082072
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,1536,16384,0.0536138653755188
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,2048,2048,0.009623466928799947
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,1536,12288,0.030270934104919434
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,1536,12288,0.041289599736531575
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,2048,768,0.006594133377075195
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,1536,10240,0.026723200082778932
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,2048,1024,0.007108266651630402
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,1536,10240,0.03592746655146281
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,1536,8192,0.02089280088742574
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,1536,8192,0.03112000028292338
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,2048,256,0.005544533332188925
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,2048,512,0.006122666597366333
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,1536,7168,0.019432532787322997
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,1536,7168,0.028171734015146895
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,1536,6144,0.016680532693862916
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,1536,6144,0.026469333966573076
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,2048,128,0.005196799834569296
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,1536,5120,0.014428800344467163
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,1536,5120,0.026231465737024943
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,1536,65536,0.1676746686299642
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,1536,12288,0.03294826745986938
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,1536,4096,0.01162453293800354
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,1536,4096,0.024401066700617473
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,1536,3584,0.011086933811505635
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,1536,3584,0.022910932699839272
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,1536,16384,0.04378879865010579
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,1536,3072,0.009867733716964722
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,1536,3072,0.021202133099238078
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,1536,10240,0.028075732787450153
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,1536,2560,0.010354133447011311
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,1536,2560,0.019723733266194664
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,1536,6144,0.01875413258870443
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,1536,2048,0.008762666583061218
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,1536,7168,0.02142613331476847
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,1536,8192,0.02358506719271342
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,1536,2048,0.019361066818237304
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,1536,1536,0.007111466427644093
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,1536,1536,0.017813332875569663
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,1536,1024,0.005432533224423727
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,1536,1024,0.016871466239293417
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,1536,5120,0.016820265849431356
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,1536,768,0.004826666911443075
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,1536,768,0.01628266672293345
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,1536,3072,0.011334400375684102
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,1536,512,0.004287999868392944
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,1536,512,0.01585919956366221
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,1536,4096,0.014124799768129984
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,1536,256,0.0037280000746250153
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,1536,256,0.015382400155067444
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,1536,3584,0.01288640002409617
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,1536,128,0.00335359995563825
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,1536,128,0.015015467007954916
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,1536,1536,0.008205866813659668
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,1536,2560,0.010264533758163451
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,1536,64,0.003190399954716364
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,1536,64,0.014667733510335287
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,1536,32,0.0031093334158261614
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,1536,32,0.014948266744613647
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,1024,65536,0.10029013156890869
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,1024,65536,0.1519221305847168
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,1536,1024,0.007102933526039123
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,1024,16384,0.02900693416595459
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,1536,2048,0.009471999605496724
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,1024,16384,0.04757333199183146
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,1024,12288,0.02723520000775655
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,1024,12288,0.03689599831899007
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,1024,10240,0.024120533466339113
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,1024,10240,0.032252800464630124
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,1536,768,0.006520533561706543
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,1024,8192,0.016785067319869996
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,1024,8192,0.028889600435892743
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,1536,256,0.0055744002262751256
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,1024,7168,0.015084800124168397
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,1536,128,0.005238399902979533
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,1536,512,0.0060928001999855045
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,1024,7168,0.0265610675017039
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,1024,6144,0.013480533162752786
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,1024,6144,0.02495253284772237
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,1024,12288,0.031137067079544067
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,1024,5120,0.012292266885439555
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,1024,5120,0.02395306626955668
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,1024,65536,0.15528106689453125
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,1024,16384,0.04048853317896525
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,1024,4096,0.010232533017794292
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,1024,4096,0.021396267414093017
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,1024,3584,0.009573333462079366
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,1024,3584,0.020963199933369956
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,1024,3072,0.008732799688975017
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,1024,3072,0.019832533597946168
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,1024,10240,0.026428800821304322
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,1024,2560,0.008381866415341695
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,1024,2560,0.01967359979947408
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,1024,8192,0.022139734029769896
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,1024,2048,0.006649599969387054
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,1024,2048,0.018441599607467652
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,1024,6144,0.017685333887736
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,1024,1536,0.005871999760468801
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,1024,1536,0.017663999398549398
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,1024,7168,0.01981546680132548
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,1024,1024,0.004805333415667216
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,1024,1024,0.0168938676516215
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,1024,3072,0.010673066973686219
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,1024,768,0.004211199780305227
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,1024,4096,0.01286186675230662
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,1024,768,0.01691733400026957
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,1024,512,0.0038890667259693147
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,1024,512,0.01572053333123525
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,1024,5120,0.015921066204706825
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,1024,256,0.0034527999659379324
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,1024,3584,0.011710932850837708
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,1024,256,0.01567359964052836
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,1024,128,0.0031914666295051576
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,1024,2560,0.009715200463930766
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,1024,128,0.015093333522478738
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,1024,64,0.0030421334008375804
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,1024,64,0.01525759994983673
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,1024,32,0.00314026673634847
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,1024,32,0.015331199765205384
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,1024,1536,0.007658666869004567
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,768,65536,0.06657706499099732
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,768,65536,0.13989866574605306
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,1024,2048,0.008914132912953694
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,768,16384,0.02434026598930359
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,768,16384,0.04325439929962158
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,768,12288,0.020155733823776244
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,1024,1024,0.0068234667181968685
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,768,12288,0.03540586630503337
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,768,10240,0.017362133661905924
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,768,10240,0.03163520097732544
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,768,65536,0.14948479334513348
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,768,8192,0.015134933590888976
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,768,16384,0.03991786638895671
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,768,8192,0.028203733762105304
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,768,7168,0.014747732877731323
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,768,7168,0.026234666506449383
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,768,12288,0.030033065875371294
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,1024,512,0.006007466713587443
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,1024,128,0.0052149335543314615
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,768,6144,0.013237333297729493
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,1024,768,0.006268799801667531
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,768,6144,0.017513600985209148
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,768,6144,0.0234389324982961
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,768,5120,0.011737599968910217
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,1024,256,0.005351466437180838
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,768,5120,0.021910399198532104
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,768,4096,0.009802666306495667
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,768,10240,0.025663999716440837
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,768,4096,0.020821332931518555
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,768,3584,0.009152000149091084
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,768,3584,0.019975467522939046
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,768,3072,0.00823466678460439
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,768,3072,0.020013866821924846
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,768,2560,0.007277866701285045
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,768,2560,0.018605866034825645
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,768,2560,0.009516800443331402
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,768,2048,0.006354133288065593
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,768,2048,0.01773759921391805
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,768,1536,0.005705599983533224
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,768,1536,0.017515732844670614
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,768,8192,0.02162453333536784
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,768,1024,0.004700799783070883
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,768,1024,0.016448000073432924
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,768,7168,0.019193599621454872
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,768,768,0.004212266703446706
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,768,768,0.016593066851298015
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,768,768,0.006051200131575266
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,768,5120,0.015378133455912272
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,768,512,0.003702399879693985
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,768,4096,0.01255466639995575
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,768,512,0.015449600418408713
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,768,256,0.00340693344672521
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,768,3584,0.011662933230400085
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,768,256,0.015057067076365152
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,768,3072,0.010269866387049357
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,768,128,0.0031466667850812277
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,768,128,0.015068800250689188
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,768,64,0.0029397333661715193
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,768,2048,0.00878613293170929
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,768,64,0.01479680041472117
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,768,32,0.002994133283694585
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,768,32,0.014839466412862143
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,768,1536,0.007632000247637431
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,512,65536,0.052627201875050864
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,512,16384,0.016657066345214844
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,512,65536,0.13030186494191487
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,512,16384,0.03788373470306396
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,512,12288,0.015287466843922935
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,512,12288,0.03371200164159139
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,768,1024,0.0066453332702318835
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,512,10240,0.013766400019327798
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,512,10240,0.030041599273681642
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,768,512,0.005724800129731497
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,512,8192,0.012006400028864543
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,768,256,0.005127466718355815
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,512,8192,0.026778666178385417
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,512,7168,0.011268267035484314
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,768,128,0.004924799998601278
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,512,7168,0.024691200256347655
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,512,6144,0.011978666981061299
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,512,6144,0.023579732577006022
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,512,5120,0.010726400216420491
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,512,65536,0.12904746532440187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,512,5120,0.02220906615257263
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,512,4096,0.009035733342170716
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,512,16384,0.0338154673576355
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,512,4096,0.020950400829315187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,512,4096,0.011315199732780456
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,512,3584,0.00803413341442744
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,512,3584,0.020217599471410115
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,512,3072,0.0071936001380284624
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,512,12288,0.026415999730428057
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,512,3072,0.01940586765607198
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,512,2560,0.007484800120194752
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,512,2560,0.01948480010032654
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,512,10240,0.022615466515223184
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,512,2048,0.00658240020275116
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,512,2048,0.01799466609954834
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,512,8192,0.018927999337514243
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,512,1536,0.005634133517742157
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,512,1536,0.016922666629155477
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,512,7168,0.016685867309570314
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,512,6144,0.015099733074506124
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,512,1024,0.004677333434422811
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,512,1024,0.016218666235605875
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,512,768,0.004299733539422353
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,512,5120,0.013338667154312134
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,512,768,0.015890133380889893
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,512,512,0.0037450666228930154
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,512,512,0.015299200018246969
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,512,3584,0.010993066430091857
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,512,256,0.0033173332611719764
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,512,3072,0.009843200445175171
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,512,256,0.014806399742762247
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,512,128,0.0030229332546393077
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,512,128,0.014890666802724203
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,512,2560,0.009038933118184407
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,512,64,0.0028543998797734577
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,512,64,0.014817066987355552
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,512,32,0.002906666696071625
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,512,32,0.014726400375366211
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,256,65536,0.037062398592631024
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,256,65536,0.12238826751708984
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,512,2048,0.008146133522192638
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,256,16384,0.01088853379090627
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,256,16384,0.03362346490224202
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,512,1536,0.007526400188604991
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,256,12288,0.009520000219345093
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,512,1024,0.006284800171852112
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,256,12288,0.02866026759147644
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,256,10240,0.010692266623179118
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,512,768,0.005909333129723867
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,256,10240,0.026245333751042682
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,512,512,0.005493333439032236
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,256,8192,0.009113599856694538
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,256,8192,0.018075732390085857
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,256,8192,0.02363199989000956
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,256,7168,0.008782933155695598
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,512,256,0.005103999873002371
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,256,7168,0.02446826696395874
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,256,6144,0.008088533580303193
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,256,6144,0.023449599742889404
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,256,5120,0.007901866734027863
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,512,128,0.004851200183232625
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,256,5120,0.02116480072339376
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,256,4096,0.007214933137098948
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,256,4096,0.01995413303375244
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,256,65536,0.12544746398925782
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,256,3584,0.007670400043328603
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,256,3584,0.01962666710217794
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,256,16384,0.03302186727523804
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,256,3072,0.006951466699441274
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,256,3072,0.018862932920455933
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,256,3072,0.00965013305346171
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,256,12288,0.025512532393137617
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,256,2560,0.006279466549555461
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,256,10240,0.021590399742126464
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,256,2560,0.018481065829594932
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,256,2560,0.008982400099436443
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,256,2048,0.00553173323472341
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,256,2048,0.01745706597963969
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,256,7168,0.016196266810099284
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,256,1536,0.004884266853332519
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,256,6144,0.014872533082962037
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,256,1536,0.01700586676597595
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,256,1024,0.004149333387613296
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,256,1024,0.01627413332462311
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,256,768,0.0038783999780813852
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,256,5120,0.013150933384895324
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,256,768,0.016104533274968465
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,256,512,0.0035264000296592714
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,256,4096,0.011330133676528931
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,256,512,0.015455999970436096
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,256,256,0.003172266731659571
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,256,256,0.015032533804575601
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,256,3584,0.010889599720637005
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,256,128,0.0029557332396507262
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,256,128,0.014535466829935709
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,256,2048,0.008097066481908163
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,256,64,0.0027285332481066385
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,256,64,0.014684800306955972
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,256,1536,0.007402666906515758
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,256,32,0.0027776000400384264
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,256,1024,0.006131199995676676
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,256,32,0.014653866489728292
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,128,65536,0.029228800535202028
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,128,16384,0.009475200374921163
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,128,65536,0.1165728012720744
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,256,768,0.005931733548641205
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,128,16384,0.03156053423881531
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,128,12288,0.00805866668621699
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,256,512,0.0054293334484100345
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,128,12288,0.027159466346104937
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,128,10240,0.008906666437784832
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,128,10240,0.02528746724128723
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,128,8192,0.008051200211048127
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,256,256,0.004947199920813243
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,128,8192,0.02316373387972514
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,128,7168,0.007627733548482259
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,128,7168,0.023062400023142495
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,128,6144,0.007095466554164887
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,128,6144,0.021937066316604616
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,256,128,0.004834133386611939
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,128,5120,0.006678399940331777
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,128,5120,0.021191465854644775
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,128,65536,0.12095893224080403
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,128,4096,0.006275199850400289
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,128,16384,0.032333866755167646
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,128,4096,0.020056533813476562
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,128,4096,0.01112320025761922
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,128,12288,0.02510720094045003
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,128,3584,0.006669866542021434
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,128,3584,0.019323732455571493
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,128,3072,0.006277333199977875
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,128,10240,0.021793067455291748
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,128,3072,0.019336533546447755
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,128,2560,0.006257066627343495
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,128,8192,0.018093866109848023
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,128,2560,0.01844373345375061
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,128,2560,0.008861866593360902
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,128,2048,0.005475200215975444
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,128,2048,0.017422932386398315
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,128,7168,0.016330666343371072
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,128,1536,0.004814933240413666
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,128,1536,0.01662720044453939
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,128,6144,0.014689067006111145
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,128,1024,0.0041696002086003625
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,128,1024,0.016127999623616537
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,128,768,0.0038272000849246977
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,128,768,0.015662933389345803
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,128,512,0.003519999980926514
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,128,512,0.01509119967619578
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,128,5120,0.013044266899426778
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,128,256,0.0031615999837716425
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,128,256,0.015201066931088766
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,128,3584,0.010707199573516846
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,128,128,0.0029440000653266907
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,128,3072,0.009514666597048442
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,128,128,0.015057067076365152
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,128,128,0.004794666667779287
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,128,64,0.002834133307139079
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,128,32,0.0028586665789286296
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,128,64,0.014436266819636025
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,128,2048,0.008028799792130788
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,64,65536,0.025820799668629962
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,128,32,0.014419200023015341
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,128,1024,0.006233599781990051
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,128,1536,0.007277866701285045
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,64,16384,0.007829333345095318
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,64,65536,0.11462186972300212
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,64,12288,0.008992000420888265
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,64,16384,0.030321067571640013
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,64,12288,0.026424533128738402
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,128,768,0.005786666770776113
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,64,10240,0.008146133522192638
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,64,10240,0.024947200218836466
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,64,8192,0.007480533421039581
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,128,512,0.0055071999629338585
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,64,8192,0.023040000597635904
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,64,7168,0.006516266862551372
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,64,6144,0.006198399762312571
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,64,7168,0.02267520030339559
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,64,5120,0.006506666541099548
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,64,6144,0.022796799739201866
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,64,4096,0.006060799956321717
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,64,5120,0.02174933354059855
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,64,3584,0.006588799754778545
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,64,4096,0.019769599040349327
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,64,3584,0.01921280026435852
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,64,3072,0.006211199859778086
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,384,128,256,0.005062399804592133
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,64,2560,0.006055466830730438
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,64,3072,0.01909760038057963
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,64,2048,0.005482666691144307
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,64,2560,0.018367999792099
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,64,2048,0.01737066706021627
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,64,1536,0.004675200084845225
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,64,1536,0.016429866353670754
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,64,1024,0.004053333401679992
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,64,1024,0.01597866714000702
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,64,768,0.003730133424202601
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,64,768,0.015759999553362526
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,64,512,0.003401600072781245
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,64,256,0.0030965333183606463
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,64,512,0.015434666474660238
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,64,32,0.002713600049416224
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,64,256,0.014732799927393594
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,64,128,0.002810666710138321
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,64,128,0.014694399634997048
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,64,64,0.002661333233118057
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,64,64,0.014470400412877402
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,64,32,0.01446293294429779
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,32,65536,0.024513065814971924
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,32,16384,0.008078933258851369
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,32,12288,0.007102933526039123
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,32,65536,0.11325759887695312
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,32,16384,0.029645866155624388
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,32,10240,0.0068917334079742435
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,32,12288,0.027009065945943194
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,32,10240,0.024654932816823325
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,32,8192,0.006513066589832306
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,32,8192,0.022934399048487344
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,32,7168,0.0060810665289560955
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,32,6144,0.005974400043487549
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,32,7168,0.022341332832972207
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,32,5120,0.006361599763234456
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,32,6144,0.021925334135691324
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,32,3584,0.01914026737213135
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,32,5120,0.02137493292490641
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,32,4096,0.005952000121275584
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,32,4096,0.01994880040486654
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,32,3584,0.006495999793211619
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,32,3072,0.0060597335298856105
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,32,2560,0.006106666723887126
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,32,3072,0.018569600582122803
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,32,1536,0.01660693287849426
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,32,2560,0.01815999945004781
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,32,2048,0.005409066875775656
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,32,1536,0.004713599880536398
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,32,512,0.0033930666744709016
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,32,2048,0.017142399152119955
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,32,1024,0.004045866678158442
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,32,1024,0.015822933117548624
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,32,768,0.003732266773780187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,32,768,0.015917866428693136
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,32,256,0.003031466652949651
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,32,64,0.014380799730618796
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,32,512,0.015273599823315939
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,32,32,0.014517333110173544
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,32,128,0.0028192001084486645
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,32,256,0.014757333199183145
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,32,64,0.0027402666707833606
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,384,32,128,0.01461013356844584
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,384,32,32,0.0027327999472618104
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,65536,12288,0.3448896090189616
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,65536,16384,0.44142506917317703
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,65536,10240,0.29023892084757485
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,65536,12288,0.6200725555419921
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,65536,12288,0.4098474820454915
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,65536,10240,0.5132266680399578
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,65536,16384,0.8055626551310221
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,65536,8192,0.23833386103312174
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,65536,8192,0.4130666732788086
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,65536,10240,0.35099948247273766
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,65536,7168,0.3670720100402832
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,65536,7168,0.2219775994618734
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,65536,7168,0.25026559829711914
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,65536,6144,0.1913354714711507
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,65536,6144,0.32347946166992186
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,65536,5120,0.2686239878336588
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,65536,5120,0.16385173797607422
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,65536,4096,0.22107200622558593
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,65536,4096,0.13729599316914876
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,65536,3584,0.19623039563496908
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,65536,3584,0.12495253086090088
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,65536,3584,0.12800853252410888
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,65536,3072,0.1719231923421224
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,65536,3072,0.11084693272908527
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,65536,3072,0.10981760025024415
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,65536,2560,0.14584852854410807
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,65536,2560,0.096562131245931
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,65536,2048,0.11992106437683106
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,65536,2048,0.08144959608713785
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,65536,16384,0.5332832018534343
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,65536,1536,0.1061738650004069
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,65536,1536,0.06957120100657145
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,65536,8192,0.26962985992431643
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,65536,1024,0.07324693202972413
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,65536,6144,0.20867625872294107
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,65536,1024,0.05494293371836344
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,65536,768,0.05389226675033569
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,65536,5120,0.17439573605855305
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,65536,768,0.04777919848759969
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,65536,4096,0.14750933647155762
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,65536,512,0.04195839961369832
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,65536,512,0.04261973301569621
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,65536,256,0.029597866535186767
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,65536,256,0.03521813154220581
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,65536,128,0.022375466426213582
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,65536,128,0.030362667640050252
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,65536,128,0.028709334135055543
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,65536,64,0.016780799627304076
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,65536,64,0.029604266087214153
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,65536,32,0.016755199432373045
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,65536,32,0.03147413333257039
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,65536,2560,0.09409173329671225
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,65536,2048,0.08160639603932698
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,16384,65536,0.46882346471150715
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,16384,65536,0.7893386840820312
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,16384,16384,0.20425705909729003
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,65536,1536,0.06611200173695883
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,16384,16384,0.15180692672729493
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,16384,12288,0.15868479410807293
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,16384,16384,0.1334378719329834
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,16384,12288,0.10692266623179118
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,65536,1024,0.053761065006256104
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,16384,10240,0.1317471981048584
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,16384,10240,0.09222293694814046
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,65536,768,0.0471285343170166
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,16384,8192,0.07885119915008545
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,16384,8192,0.10800746281941731
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,65536,512,0.03715733289718628
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,65536,256,0.031497599681218465
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,16384,7168,0.09515413443247477
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,16384,7168,0.07086079915364583
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,16384,6144,0.08295786380767822
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,16384,6144,0.06295359929402669
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,16384,6144,0.05575786828994751
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,16384,5120,0.0701749324798584
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,16384,5120,0.05508266687393189
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,16384,4096,0.05770026842753092
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,16384,4096,0.04897493521372477
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,16384,4096,0.03960640033086141
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,16384,3584,0.05162133375803629
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,16384,3584,0.04432320197423299
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,16384,3072,0.04573013385136922
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,16384,3072,0.0405781348546346
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,16384,65536,0.5471712112426758
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,16384,2560,0.04423679908116658
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,16384,2560,0.037931732336680096
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,16384,12288,0.10515519777933757
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,16384,10240,0.09113066991170247
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,16384,2560,0.02950506607691447
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,16384,2048,0.03636159896850586
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,16384,2048,0.03208960096041362
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,16384,1536,0.02645653287569682
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,16384,8192,0.07061013380686441
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,16384,1536,0.02774080038070679
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,16384,7168,0.06404053370157878
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,16384,1024,0.02037973403930664
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,16384,1024,0.02378773291905721
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,16384,1024,0.016522666811943053
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,16384,768,0.014481066664059957
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,16384,768,0.02239146629969279
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,16384,512,0.011077333490053813
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,16384,512,0.020898133516311646
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,16384,256,0.008461866776148479
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,16384,5120,0.04770239988962809
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,16384,256,0.017874133586883546
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,16384,128,0.006026666859785716
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,16384,128,0.017644800742467246
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,16384,128,0.01007466713587443
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,16384,3584,0.03633066813151042
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,16384,64,0.005496533215045929
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,16384,32,0.005814399818579356
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,16384,64,0.017593600352605186
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,16384,32,0.018101332585016887
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,16384,3072,0.03238826592763265
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,12288,65536,0.3721834818522135
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,12288,65536,0.5850720087687175
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,12288,16384,0.1531050682067871
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,12288,16384,0.1206175963083903
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,12288,16384,0.11279253164927165
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,12288,12288,0.11849386692047119
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,12288,12288,0.0876810630162557
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,16384,2048,0.025719465812047322
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,16384,1536,0.021154133478800456
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,12288,10240,0.09861439863840739
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,12288,10240,0.07409173647562663
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,16384,768,0.015145599842071533
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,12288,8192,0.08000106811523437
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,16384,512,0.013056000073750814
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,12288,8192,0.06254400014877319
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,12288,7168,0.07057920296986898
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,12288,7168,0.056567466259002684
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,16384,256,0.0108106662829717
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,12288,6144,0.06204479932785034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,12288,6144,0.05134613513946533
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,12288,5120,0.05279786586761474
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,12288,5120,0.045441067218780516
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,12288,65536,0.46930348078409834
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,12288,4096,0.05000853141148885
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,12288,4096,0.04269973436991374
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,12288,12288,0.08675733407338461
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,12288,3584,0.03921706676483154
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,12288,10240,0.07160960038503011
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,12288,3584,0.03772053321202596
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,12288,3072,0.034782934188842776
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,12288,3072,0.03464320103327433
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,12288,2560,0.030087467034657794
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,12288,8192,0.05764586528142294
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,12288,2560,0.030758400758107502
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,12288,2048,0.02507946689923604
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,12288,7168,0.05208106835683187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,12288,2048,0.0272597332795461
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,12288,6144,0.04534506797790527
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,12288,1536,0.019924267133076986
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,12288,1536,0.024344533681869507
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,12288,1024,0.013481600085894265
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,12288,1024,0.021995733181635536
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,12288,5120,0.03882879813512166
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,12288,768,0.011286399761835734
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,12288,768,0.02090880076090495
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,12288,4096,0.03239679932594299
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,12288,512,0.009171199798583985
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,12288,512,0.01858560045560201
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,12288,3584,0.02924693425496419
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,12288,3072,0.026152533292770386
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,12288,256,0.006683733562628429
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,12288,256,0.01759679913520813
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,12288,256,0.00853653351465861
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,12288,128,0.005198933184146881
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,12288,128,0.01684479912122091
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,12288,128,0.007938133180141449
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,12288,2560,0.02395840088526408
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,12288,64,0.004657066861788432
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,12288,2048,0.020090667406717937
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,12288,32,0.005121066669623057
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,12288,64,0.016901334126790367
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,12288,32,0.017402666807174682
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,12288,1536,0.016410666704177856
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,10240,16384,0.13089706897735595
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,10240,16384,0.09629440307617188
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,12288,1024,0.014028799533843995
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,10240,12288,0.10127573013305664
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,10240,12288,0.07695573170979818
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,10240,65536,0.35509548187255857
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,10240,65536,0.5380320231119792
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,10240,10240,0.0843733310699463
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,12288,768,0.01202453374862671
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,10240,10240,0.06652799844741822
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,10240,8192,0.06894613107045491
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,10240,8192,0.056328535079956055
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,10240,7168,0.061006931463877354
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,10240,7168,0.05155520041783651
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,10240,6144,0.053756801287333164
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,10240,6144,0.046931199232737225
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,12288,512,0.01057919959227244
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,10240,5120,0.045815467834472656
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,10240,5120,0.041672531763712564
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,10240,12288,0.07743146419525146
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,10240,4096,0.03808000087738037
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,10240,4096,0.03738986651102702
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,10240,16384,0.0997759977976481
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,10240,6144,0.04024746815363566
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,10240,3584,0.034408533573150636
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,10240,65536,0.4374176025390625
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,10240,3584,0.03452800114949544
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,10240,3072,0.03166933258374532
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,10240,3072,0.034677334626515705
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,10240,10240,0.07005546887715658
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,10240,2560,0.026606933275858564
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,10240,2560,0.02840106685956319
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,10240,7168,0.04675413370132446
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,10240,2048,0.021699200073877968
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,10240,2048,0.025459200143814087
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,10240,1536,0.017386666933695474
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,10240,1536,0.023412267367045082
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,10240,8192,0.052108800411224364
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,10240,1024,0.01250986655553182
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,10240,1024,0.021620267629623414
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,10240,5120,0.03513600031534831
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,10240,768,0.010838400324185688
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,10240,768,0.02062506675720215
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,10240,3072,0.023364265759785972
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,10240,512,0.0090037335952123
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,10240,4096,0.029226666688919066
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,10240,1536,0.014612266421318054
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,10240,512,0.01834026575088501
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,10240,256,0.006444799900054932
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,10240,256,0.017760000626246133
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,10240,128,0.005117866893609365
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,10240,128,0.016641066471735636
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,10240,2560,0.021386667092641195
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,10240,64,0.0047541335225105286
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,10240,64,0.016827734311421712
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,10240,32,0.004977066814899444
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,10240,32,0.01696959932645162
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,10240,3584,0.026387200752894087
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,10240,1024,0.012826666235923767
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,10240,2048,0.01808533271153768
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,8192,65536,0.4196778615315755
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,8192,16384,0.10418879985809326
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,8192,65536,0.2728437423706055
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,8192,16384,0.09072106679280599
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,8192,12288,0.08154346942901611
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,8192,12288,0.06631146669387818
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,8192,12288,0.05644693374633789
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,8192,10240,0.06833279927571614
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,8192,10240,0.057601066430409756
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,8192,8192,0.05491093397140503
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,8192,8192,0.04896320104598999
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,10240,256,0.007587199906508128
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,10240,128,0.007175466914971669
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,8192,7168,0.049225600560506184
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,8192,7168,0.04488213459650676
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,8192,65536,0.28932905197143555
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,8192,7168,0.03577920198440552
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,8192,16384,0.07214293479919434
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,8192,6144,0.04375360012054443
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,10240,768,0.010797866185506185
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,8192,5120,0.03746773401896159
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,8192,6144,0.04235413471857707
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,8192,5120,0.03657706578572591
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,10240,512,0.008906666437784832
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,8192,4096,0.03059946695963542
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,8192,4096,0.032807467381159465
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,8192,3584,0.027863466739654542
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,8192,10240,0.047601068019866945
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,8192,3584,0.02921813329060872
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,8192,3072,0.024794665972391765
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,8192,3072,0.027318400144577027
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,8192,2560,0.021208532651265464
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,8192,3072,0.018905599912007652
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,8192,8192,0.039044264952341715
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,8192,2560,0.025381332635879515
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,8192,2048,0.01732800006866455
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,8192,2048,0.023324799537658692
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,8192,1536,0.013329066832860312
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,8192,1536,0.021811199188232423
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,8192,1024,0.009938133756319682
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,8192,1024,0.0202346662680308
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,8192,6144,0.031240532795588177
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,8192,768,0.00856213370958964
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,8192,5120,0.027265065908432008
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,8192,4096,0.02318293253580729
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,8192,768,0.018729599316914876
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,8192,512,0.007257600128650665
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,8192,3584,0.021545600891113282
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,8192,512,0.01690346598625183
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,8192,256,0.005202133456865946
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,8192,256,0.01646613379319509
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,8192,256,0.007484800120194752
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,8192,2560,0.016839466492335
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,8192,128,0.0045738667249679565
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,8192,128,0.01616320013999939
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,8192,64,0.004075733323891958
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,8192,2048,0.015197867155075073
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,8192,64,0.016310399770736693
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,8192,32,0.004369066655635833
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,8192,1536,0.012872533003489176
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,8192,32,0.016862932840983072
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,8192,1024,0.011068800091743469
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,7168,16384,0.09106026490529379
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,7168,65536,0.2473130702972412
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,7168,65536,0.34480533599853513
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,7168,16384,0.07572906812032064
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,7168,65536,0.25572800636291504
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,7168,12288,0.0715936024983724
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,7168,12288,0.06828479766845703
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,7168,10240,0.06057279904683431
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,7168,10240,0.05249173243840536
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,7168,8192,0.04839786688486735
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,7168,8192,0.044793601830800375
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,7168,8192,0.03594453334808349
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,7168,7168,0.043281066417694095
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,7168,7168,0.041354668140411374
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,7168,7168,0.03263573249181111
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,7168,6144,0.0387168010075887
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,7168,6144,0.03771413167317708
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,7168,5120,0.033701332410176595
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,7168,5120,0.03477120002110799
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,8192,768,0.01013973355293274
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,7168,4096,0.027116799354553224
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,8192,512,0.008263466755549113
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,7168,4096,0.02969920039176941
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,8192,128,0.007121066749095917
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,7168,4096,0.021228800217310585
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,7168,3584,0.02455893357594808
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,7168,3584,0.02775146762530009
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,7168,3072,0.02193173368771871
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,7168,3072,0.025206400950749712
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,7168,3072,0.01723840037981669
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,7168,2560,0.018768000602722167
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,7168,2560,0.023614933093388878
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,7168,16384,0.06638933420181274
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,7168,2048,0.015131733814875283
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,7168,2048,0.02225493391354879
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,7168,12288,0.050874666372934974
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,7168,1536,0.011925333738327026
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,7168,10240,0.04361813465754191
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,7168,1536,0.02094399929046631
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,7168,1536,0.01192639966805776
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,7168,1024,0.009274666508038838
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,7168,1024,0.01924906571706136
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,7168,768,0.008080000181992848
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,7168,768,0.01723840037981669
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,7168,6144,0.028381866216659547
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,7168,768,0.008303999900817871
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,7168,512,0.006551466882228851
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,7168,512,0.016742400328318276
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,7168,256,0.004833066463470459
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,7168,5120,0.02511253356933594
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,7168,256,0.01644373337427775
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,7168,128,0.0042463997999827065
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,7168,128,0.015892266233762106
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,7168,64,0.003913599997758865
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,7168,3584,0.019180800517400107
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,7168,64,0.01585599978764852
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,7168,32,0.004196266829967499
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,7168,32,0.01619733373324076
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,6144,65536,0.31552213033040366
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,6144,65536,0.2352224032084147
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,7168,2560,0.015569067001342774
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,6144,16384,0.08052053451538085
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,7168,2048,0.01390720009803772
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,6144,16384,0.06867093245188395
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,6144,12288,0.0630079984664917
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,6144,12288,0.05555200179417928
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,6144,10240,0.05480959812800089
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,6144,10240,0.049464531739552814
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,7168,1024,0.010098133484522502
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,6144,8192,0.04283413489659627
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,6144,8192,0.041511468092600506
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,7168,512,0.007468800246715546
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,6144,8192,0.03251840074857076
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,6144,7168,0.03836479981740316
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,7168,256,0.006549333532651265
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,6144,7168,0.03886080185572306
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,6144,6144,0.03437013228734334
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,6144,6144,0.03518613179524739
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,7168,128,0.006121600170930227
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,6144,5120,0.030007465680440264
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,6144,5120,0.031846400101979574
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,6144,4096,0.02399786710739136
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,6144,4096,0.02760426600774129
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,6144,16384,0.06005653142929077
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,6144,65536,0.23402560551961266
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,6144,3584,0.021670399109522502
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,6144,12288,0.04708480040232341
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,6144,3584,0.02683839996655782
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,6144,3072,0.019604265689849854
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,6144,3072,0.024752000967661537
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,6144,2560,0.016362667083740234
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,6144,2560,0.02331519921620687
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,6144,10240,0.03940693140029907
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,6144,2048,0.013154133160909017
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,6144,2048,0.02134933272997538
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,6144,7168,0.02996586759885152
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,6144,6144,0.025960532824198405
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,6144,1536,0.010788266857465107
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,6144,1536,0.020376533269882202
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,6144,1024,0.008411733309427898
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,6144,5120,0.022590933243433635
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,6144,1024,0.018373332420984902
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,6144,768,0.007377066711584728
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,6144,4096,0.019589332739512126
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,6144,768,0.017256534099578856
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,6144,768,0.007703466713428498
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,6144,512,0.005719466507434845
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,6144,512,0.01644480029741923
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,6144,512,0.007067733506361644
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,6144,256,0.004583466549714406
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,6144,256,0.016090666254361473
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,6144,128,0.004075733323891958
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,6144,128,0.015568000078201295
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,6144,128,0.006052266558011373
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,6144,64,0.0037098666032155357
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,6144,64,0.015783466895421348
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,6144,32,0.0039434666434923805
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,6144,32,0.016065067052841185
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,5120,65536,0.2849045435587565
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,6144,3584,0.017500799894332886
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,6144,3072,0.016080000003178916
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,5120,65536,0.2121386686960856
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,5120,65536,0.22088747024536132
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,6144,2560,0.014628266294797262
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,5120,16384,0.0688927968343099
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,5120,16384,0.06250666777292888
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,5120,12288,0.054972799619038906
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,5120,12288,0.05158400138219198
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,6144,2048,0.012876799702644348
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,5120,10240,0.049930667877197264
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,5120,10240,0.045899732907613115
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,6144,1536,0.010869333148002624
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,5120,8192,0.0367520014444987
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,5120,8192,0.03868800004323324
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,5120,8192,0.029362134138743085
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,5120,7168,0.03402239878972371
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,5120,7168,0.036166401704152425
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,6144,1024,0.009157333771387737
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,5120,6144,0.030011733373006184
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,5120,6144,0.03282559911410014
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,5120,5120,0.027999999125798543
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,6144,256,0.006387199958165486
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,5120,5120,0.02922240098317464
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,5120,4096,0.021267199516296388
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,5120,4096,0.026833067337671917
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,5120,3584,0.01959786613782247
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,5120,16384,0.05471893151601156
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,5120,3584,0.024989867210388185
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,5120,3072,0.017812265952428182
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,5120,12288,0.04208853244781494
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,5120,3072,0.02376746733983358
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,5120,10240,0.036098134517669675
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,5120,2560,0.015000533064206442
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,5120,2560,0.022086399793624877
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,5120,2048,0.012083199620246888
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,5120,2048,0.020796799659729005
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,5120,7168,0.027005867163340254
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,5120,1536,0.010146133104960124
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,5120,1536,0.019640533129374187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,5120,6144,0.023494400580724082
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,5120,1024,0.007720533510049183
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,5120,5120,0.02030613422393799
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,5120,1024,0.017029333114624023
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,5120,768,0.006903466582298279
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,5120,768,0.016990933815638223
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,5120,4096,0.017042134205500284
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,5120,512,0.005258666475613912
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,5120,512,0.01658986707528432
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,5120,3584,0.01572480003039042
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,5120,256,0.004388266801834106
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,5120,3072,0.014283733566602072
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,5120,256,0.01586666703224182
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,5120,128,0.003894400099913279
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,5120,2560,0.013476266463597616
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,5120,128,0.015595733126004537
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,5120,64,0.0035989334185918174
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,5120,2048,0.011797333757082622
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,5120,64,0.015617066621780395
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,5120,32,0.003922133396069208
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,5120,32,0.015848533312479655
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,5120,1536,0.009910399715105694
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,4096,65536,0.2231680075327555
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,4096,65536,0.18395946820576986
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,5120,1024,0.008126933375994365
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,4096,16384,0.06549013455708821
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,4096,16384,0.05771093368530274
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,4096,12288,0.052198398113250735
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,4096,12288,0.04848213195800781
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,5120,768,0.007186133166154225
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,4096,10240,0.04468693335851033
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,4096,10240,0.043815465768178304
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,4096,8192,0.03264213403065999
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,4096,8192,0.036423468589782716
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,5120,512,0.0065077334642410275
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,4096,7168,0.03150826692581177
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,4096,7168,0.03322239915529887
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,5120,256,0.005994666616121928
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,4096,6144,0.026935466130574542
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,4096,6144,0.031141332785288495
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,5120,128,0.005719466507434845
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,4096,5120,0.023635200659434
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,4096,5120,0.02860586643218994
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,4096,16384,0.04920746485392253
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,4096,4096,0.019108267625172932
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,4096,65536,0.20377279917399088
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,4096,12288,0.03882666826248169
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,4096,4096,0.025060266256332397
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,4096,3584,0.01746986707051595
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,4096,3584,0.023899734020233154
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,4096,3072,0.014634666840235391
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,4096,10240,0.03464853366216024
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,4096,3072,0.023056000471115112
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,4096,2560,0.012703999876976013
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,4096,8192,0.026169600089391072
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,4096,2560,0.021673599878946938
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,4096,2048,0.011335466305414836
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,4096,2048,0.020345600446065266
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,4096,7168,0.02372693419456482
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,4096,1536,0.00925546685854594
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,4096,1536,0.01841813325881958
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,4096,6144,0.021384533246358237
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,4096,5120,0.01829013427098592
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,4096,1024,0.007401599983374278
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,4096,1024,0.017942400773366292
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,4096,768,0.006545066833496094
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,4096,768,0.016897066434224447
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,4096,4096,0.01588373382886251
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,4096,512,0.0046858668327331545
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,4096,3072,0.013697066903114319
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,4096,512,0.016074666380882265
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,4096,256,0.0039818666875362395
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,4096,256,0.015617066621780395
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,4096,2048,0.011125333110491435
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,4096,128,0.0035797332723935447
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,4096,128,0.015213867028554281
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,4096,3584,0.014545067151387533
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,4096,64,0.0034517332911491393
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,4096,64,0.01520746648311615
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,4096,32,0.003676799933115641
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,4096,2560,0.012657066186269125
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,4096,32,0.015609600146611533
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,4096,1024,0.007565866907437642
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,3584,65536,0.19223893483479818
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,3584,65536,0.17454293568929036
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,3584,16384,0.06091200113296509
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,3584,16384,0.055674668153127035
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,3584,12288,0.04614400068918864
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,3584,12288,0.04538880189259847
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,4096,1536,0.009480533003807069
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,3584,10240,0.036408531665802005
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,3584,10240,0.03951253493626912
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,4096,768,0.007203199962774913
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,3584,8192,0.029684267441431683
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,3584,8192,0.03527466853459676
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,4096,256,0.006062933305899302
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,4096,512,0.006635733445485433
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,4096,128,0.00573333352804184
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,3584,7168,0.027243733406066895
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,3584,6144,0.024053333202997844
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,3584,7168,0.03135679960250855
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,3584,6144,0.029270400603612263
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,3584,5120,0.020703999201456706
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,3584,5120,0.026763733228047686
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,3584,12288,0.03568106492360433
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,3584,4096,0.017708800236384072
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,3584,4096,0.024280534187952677
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,3584,65536,0.19131733576456705
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,3584,16384,0.04610453446706136
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,3584,3584,0.01606186628341675
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,3584,3584,0.023180800676345825
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,3584,3072,0.013448533415794373
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,3584,3072,0.021894399325052896
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,3584,10240,0.030084266265233355
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,3584,2560,0.011608533064524333
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,3584,2560,0.02062826752662659
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,3584,6144,0.01952000061670939
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,3584,2048,0.009924266735712688
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,3584,2048,0.019526400168736777
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,3584,8192,0.025206400950749712
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,3584,1536,0.00840106705824534
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,3584,1536,0.019082667430241902
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,3584,7168,0.02285333275794983
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,3584,1024,0.006799999872843425
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,3584,1024,0.016777600844701132
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,3584,5120,0.0175872008005778
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,3584,768,0.00517546683549881
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,3584,768,0.016203733285268147
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,3584,3072,0.012717866897583007
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,3584,512,0.004401066899299621
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,3584,512,0.016075733304023742
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,3584,4096,0.014918399850527444
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,3584,256,0.0037962667644023894
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,3584,256,0.015664000312487283
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,3584,3584,0.014011733730634055
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,3584,128,0.003487999985615412
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,3584,128,0.015185067057609558
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,3584,2560,0.011774933338165284
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,3584,64,0.0032245332996050516
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,3584,64,0.015223466356595359
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,3584,32,0.0034261333445707956
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,3584,32,0.01532373329003652
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,3584,1536,0.008813866972923278
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,3072,65536,0.16747627258300782
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,3072,65536,0.15970133145650228
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,3072,16384,0.0501749316851298
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,3072,16384,0.05232959985733032
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,3584,2048,0.010469333330790202
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,3072,12288,0.04285333156585693
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,3584,1024,0.007378133138020833
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,3072,12288,0.04330666859944661
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,3072,10240,0.03678826491038005
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,3072,10240,0.039884801705678305
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,3584,768,0.006723199784755707
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,3072,8192,0.02790293296178182
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,3072,8192,0.0327455997467041
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,3584,256,0.005743999779224396
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,3072,7168,0.025042132536570234
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,3072,7168,0.029958399136861165
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,3584,512,0.006122666597366333
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,3072,6144,0.022284799814224245
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,3584,128,0.005492266515890757
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,3072,6144,0.027534933884938557
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,3072,65536,0.1711359977722168
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,3072,5120,0.019853866100311278
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,3072,5120,0.025015467405319215
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,3072,12288,0.03343146642049154
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,3072,4096,0.015158399939537048
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,3072,16384,0.04309226671854655
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,3072,4096,0.0232149342695872
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,3072,3584,0.013963733116785684
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,3072,3584,0.02200746734937032
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,3072,3072,0.012401066223780314
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,3072,3072,0.021733333667119346
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,3072,10240,0.02840533256530762
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,3072,2560,0.01074666678905487
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,3072,2560,0.02011093298594157
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,3072,8192,0.023844265937805177
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,3072,2048,0.009348266323407491
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,3072,2048,0.019400533040364584
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,3072,6144,0.018477867046991982
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,3072,1536,0.007753600180149078
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,3072,1536,0.017369600137074788
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,3072,7168,0.021653334299723305
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,3072,1024,0.006169599791367849
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,3072,1024,0.01679146687189738
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,3072,3072,0.012103466192881267
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,3072,768,0.0050687998533248905
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,3072,768,0.01600106656551361
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,3072,4096,0.013993600010871887
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,3072,512,0.004217599829037985
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,3072,512,0.015820800264676412
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,3072,5120,0.016438399751981102
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,3072,3584,0.013082666198412576
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,3072,256,0.0036629334092140196
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,3072,256,0.015292800466219583
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,3072,128,0.003307733436425527
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,3072,128,0.014898133277893067
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,3072,2560,0.011162666479746501
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,3072,64,0.003197866678237915
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,3072,64,0.01497706671555837
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,3072,32,0.0034122665723164878
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,3072,32,0.015179733435312906
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,2560,65536,0.14461226463317872
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,3072,1536,0.007876266539096833
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,2560,65536,0.14895680745442708
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,2560,16384,0.04355733394622803
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,3072,2048,0.00981119970480601
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,2560,16384,0.04792746702829997
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,2560,12288,0.035884801546732584
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,3072,1024,0.0069919998447100324
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,2560,12288,0.03898880084355672
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,2560,10240,0.032313599189122515
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,2560,10240,0.036763731638590494
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,3072,128,0.0051125332713127134
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,3072,256,0.0054624001185099285
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,2560,8192,0.023016534248987832
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,3072,768,0.006362666686375936
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,3072,512,0.005801600217819214
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,2560,8192,0.03067626754442851
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,2560,7168,0.02097813288370768
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,2560,7168,0.02818666696548462
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,2560,6144,0.01797119975090027
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,2560,6144,0.02648746569951375
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,2560,5120,0.018563199043273925
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,2560,5120,0.024456532796223958
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,2560,12288,0.03178240060806274
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,2560,65536,0.15425705909729004
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,2560,4096,0.014184533556302389
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,2560,4096,0.022488532463709514
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,2560,3584,0.012600533167521157
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,2560,16384,0.041501867771148684
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,2560,3584,0.021307732661565146
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,2560,3072,0.01172266701857249
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,2560,3072,0.020797866582870483
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,2560,10240,0.027085866530736285
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,2560,2560,0.010033067067464192
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,2560,2560,0.019748266537984213
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,2560,6144,0.017669334014256795
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,2560,2048,0.008740267157554627
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,2560,2048,0.017911465962727864
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,2560,7168,0.02086826761563619
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,2560,8192,0.022664533058802287
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,2560,1536,0.007274666428565979
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,2560,1536,0.017560533682505288
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,2560,1024,0.005345066885153452
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,2560,1024,0.016505600015322367
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,2560,5120,0.015920000274976094
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,2560,768,0.0047989333669344585
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,2560,768,0.015818666418393454
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,2560,3072,0.011854933698972066
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,2560,512,0.004095999896526337
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,2560,512,0.01581760048866272
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,2560,4096,0.013751467068990072
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,2560,256,0.003618133316437403
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,2560,256,0.015506133437156677
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,2560,3584,0.01297706663608551
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,2560,128,0.0032501332461833954
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,2560,128,0.015014400084813436
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,2560,2560,0.010877866546312969
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,2560,64,0.0032000000278155005
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,2560,64,0.014993066589037577
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,2560,1536,0.007799466451009114
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,2560,32,0.0033301333586374915
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,2560,32,0.015101866920789084
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,2048,65536,0.11668907006581623
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,2560,2048,0.0093941330909729
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,2048,65536,0.13352640469868976
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,2560,1024,0.006985599795977275
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,2048,16384,0.036908801396687826
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,2048,16384,0.046641067663828535
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,2048,12288,0.03426773150761922
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,2048,12288,0.03773866494496663
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,2560,768,0.006434133152167003
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,2048,10240,0.03009066581726074
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,2048,10240,0.03289600014686585
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,2048,8192,0.018716800212860107
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,2560,256,0.005504000186920166
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,2560,512,0.00588266650835673
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,2048,8192,0.029102933406829835
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,2048,7168,0.018467199802398682
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,2048,7168,0.027281065781911213
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,2560,128,0.005194666484991709
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,2048,6144,0.017829332749048868
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,2048,6144,0.025191466013590496
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,2048,5120,0.016619732975959776
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,2048,5120,0.024101332823435465
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,2048,65536,0.15321386655171712
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,2048,4096,0.01188800036907196
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,2048,16384,0.03510719935099284
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,2048,4096,0.02244053284327189
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,2048,3584,0.010703999797503154
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,2048,12288,0.027876265843709308
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,2048,3584,0.02218559980392456
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,2048,3072,0.01018346647421519
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,2048,3072,0.020096000035603842
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,2048,10240,0.022909865776697794
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,2048,2560,0.00811413327852885
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,2048,2560,0.018130133549372353
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,2048,8192,0.019308799505233766
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,2048,7168,0.01797440052032471
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,2048,2048,0.007259733478228251
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,2048,2048,0.017217065890630087
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,2048,1536,0.006236800054709116
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,2048,6144,0.01611840029557546
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,2048,1536,0.017105066776275636
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,2048,1024,0.0049333333969116214
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,2048,1024,0.016327466567357382
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,2048,5120,0.014198399583498635
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,2048,768,0.0044383997718493145
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,2048,768,0.01593386630217234
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,2048,4096,0.012708266576131185
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,2048,512,0.003985066711902618
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,2048,512,0.015686399737993875
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,2048,3072,0.010914132992426554
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,2048,256,0.003502933432658514
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,2048,256,0.015266133348147073
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,2048,3584,0.011741866668065388
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,2048,128,0.0032405334214369455
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,2048,128,0.014819199840227762
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,2048,2048,0.008416000008583068
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,2048,64,0.0031146667897701263
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,2048,768,0.006200533111890157
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,2048,32,0.003328000009059906
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,2048,64,0.015050666530927024
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,2048,1024,0.006461866696675618
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,2048,32,0.015226667126019796
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,2048,2560,0.009827199578285217
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,1536,65536,0.0919594685236613
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,1536,16384,0.03030933340390523
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,1536,65536,0.11951680183410644
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,1536,16384,0.040807465712229415
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,1536,12288,0.026358399788538617
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,1536,12288,0.03290133277575175
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,1536,10240,0.023534933725992836
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,1536,10240,0.029574400186538695
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,2048,1536,0.007630933324495952
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,1536,8192,0.017476266622543334
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,1536,8192,0.026727465788523357
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,2048,256,0.005278933544953664
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,2048,512,0.0058442667126655575
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,1536,7168,0.015537066260973611
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,1536,7168,0.025081600745519
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,2048,128,0.005108266572157542
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,1536,6144,0.013822933038075766
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,1536,6144,0.023817600806554158
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,1536,5120,0.015105066696802774
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,1536,5120,0.02213546633720398
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,1536,12288,0.028306132555007933
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,1536,4096,0.010709333419799804
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,1536,4096,0.020363734165827433
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,1536,16384,0.03725546598434448
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,1536,65536,0.13982826868693035
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,1536,3584,0.009630933403968811
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,1536,3584,0.01989226738611857
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,1536,3072,0.009116799632708231
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,1536,3072,0.0191594660282135
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,1536,10240,0.024488532543182374
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,1536,2560,0.007912533481915791
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,1536,2560,0.01839359998703003
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,1536,8192,0.02070080041885376
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,1536,2048,0.006962133447329204
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,1536,6144,0.016657066345214844
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,1536,2048,0.01776426633199056
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,1536,7168,0.018837332725524902
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,1536,1536,0.00588266650835673
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,1536,1536,0.01723519961039225
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,1536,5120,0.01467519998550415
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,1536,1024,0.004952533543109894
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,1536,1024,0.016532267133394875
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,1536,768,0.004293333490689596
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,1536,768,0.0160778671503067
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,1536,4096,0.012824533383051553
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,1536,512,0.003819733361403147
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,1536,3072,0.010132267077763876
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,1536,512,0.015847466389338174
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,1536,3584,0.011591466267903645
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,1536,256,0.0034293333689371743
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,1536,256,0.015153066317240397
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,1536,2560,0.009380267063776652
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,1536,128,0.0030847998956839246
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,1536,128,0.015041066209475198
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,1536,64,0.0029343999922275543
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,1536,64,0.014839466412862143
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,1536,32,0.0030933332939942675
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,1536,32,0.015102932850519816
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,1024,65536,0.06726613044738769
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,1024,65536,0.10664853254954021
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,1536,1536,0.007428266604741414
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,1024,16384,0.024449066321055094
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,1024,16384,0.03622186581293742
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,1536,2048,0.008567466338475545
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,1024,12288,0.02039573391278585
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,1024,12288,0.032893866300582886
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,1536,768,0.006153599917888641
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,1024,10240,0.015948800245920818
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,1024,10240,0.03034026622772217
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,1536,1024,0.006658133367697399
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,1024,8192,0.012955733140309653
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,1024,8192,0.025498666365941364
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,1536,128,0.005070933202902476
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,1024,7168,0.011912533640861511
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,1536,256,0.00524479995171229
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,1536,512,0.005809066692988077
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,1024,7168,0.02419840097427368
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,1024,6144,0.014425599575042724
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,1024,6144,0.02320853273073832
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,1024,5120,0.013452800114949546
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,1024,5120,0.021819732586542764
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,1024,65536,0.11724692980448406
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,1024,4096,0.010815999905268351
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,1024,4096,0.020080000162124634
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,1024,16384,0.030802132685979207
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,1024,3584,0.010048000017801921
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,1024,3584,0.019618133703867592
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,1024,12288,0.024759467442830405
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,1024,3072,0.009306666254997254
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,1024,3072,0.019345066944758096
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,1024,10240,0.021607466538747153
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,1024,2560,0.007810133198897044
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,1024,2560,0.018476800123850504
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,1024,8192,0.017826133966445924
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,1024,2048,0.006899199883143107
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,1024,2048,0.017545600732167564
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,1024,7168,0.016499200463294984
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,1024,1536,0.0056650668382644655
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,1024,6144,0.014548266927401224
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,1024,1536,0.0169322669506073
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,1024,1024,0.004825599988301595
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,1024,1024,0.016343466440836587
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,1024,5120,0.012609066565831504
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,1024,768,0.004185600082079569
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,1024,768,0.015585066874821982
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,1024,4096,0.010770133137702942
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,1024,512,0.0037610667447249093
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,1024,512,0.015198933084805808
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,1024,3584,0.010451199611028035
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,1024,3072,0.009598933657010396
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,1024,256,0.003234133372704188
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,1024,256,0.014972800016403198
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,1024,128,0.0029653333127498626
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,1024,128,0.01469546655813853
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,1024,2048,0.007860266665617625
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,1024,64,0.0028661333024501802
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,1024,2560,0.009181867043177288
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,1024,64,0.014656000336011252
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,1024,32,0.002994133283694585
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,1024,1024,0.0062720000743865965
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,1024,32,0.014481066664059957
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,768,65536,0.05508373181025187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,768,16384,0.019041067361831664
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,768,65536,0.09933013121287028
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,1024,768,0.0057546665271123254
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,768,16384,0.0333077331384023
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,768,12288,0.014180266857147216
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,1024,1536,0.007316266496976216
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,768,12288,0.031294933954874676
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,768,10240,0.014089600245157877
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,768,10240,0.028258132934570312
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,768,8192,0.012340266505877178
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,768,8192,0.02540053327878316
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,1024,512,0.00553599993387858
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,1024,256,0.0050805335243542995
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,768,7168,0.011411199967066448
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,1024,128,0.004931200047334035
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,768,7168,0.023499733209609984
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,768,6144,0.012231466174125672
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,768,6144,0.022740266720453896
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,768,5120,0.010845866799354554
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,768,5120,0.021000534296035767
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,768,65536,0.11542932987213135
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,768,16384,0.030696533123652142
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,768,4096,0.009325866897900898
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,768,4096,0.020165334145228066
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,768,3584,0.008661333719889324
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,768,12288,0.023885866006215416
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,768,3584,0.019332265853881835
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,768,3072,0.007481599847475688
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,768,10240,0.020786132415135702
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,768,3072,0.018806399901707967
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,768,8192,0.017518933614095053
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,768,2560,0.006761600077152252
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,768,2560,0.017949867248535156
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,768,2048,0.005940266450246175
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,768,2048,0.017169066270192466
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,768,7168,0.01628266672293345
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,768,1536,0.005193600058555603
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,768,1536,0.016636799772580466
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,768,6144,0.013803733388582864
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,768,1024,0.004365866879622141
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,768,1024,0.016198399662971496
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,768,5120,0.012128000458081562
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,768,768,0.0040224000811576845
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,768,768,0.015501866738001505
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,768,4096,0.010420266787211101
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,768,512,0.003607466568549474
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,768,512,0.01530346671740214
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,768,3072,0.009425066908200582
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,768,256,0.0032458665470282235
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,768,256,0.015027200182278952
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,768,2048,0.00782400021950404
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,768,128,0.003078400095303853
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,768,128,0.014521599809328715
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,768,3584,0.010220799843470256
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,768,64,0.0028384000062942503
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,768,2560,0.008913066983222962
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,768,64,0.014605866869290671
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,768,32,0.002930133293072383
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,768,32,0.014668800433476768
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,512,65536,0.04242773453394572
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,512,65536,0.09217387040456136
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,512,16384,0.01264959971110026
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,512,16384,0.029471999406814574
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,768,1024,0.006232533355553945
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,512,12288,0.01134933332602183
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,512,12288,0.02609493335088094
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,512,12288,0.023560533920923867
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,768,1536,0.00703359991312027
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,512,10240,0.010470400253931682
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,512,10240,0.024870399634043375
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,512,8192,0.009040000041325887
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,768,768,0.00572266678015391
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,512,8192,0.02281600038210551
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,512,7168,0.01118933359781901
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,512,65536,0.11512426535288493
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,512,7168,0.022139734029769896
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,512,6144,0.008416000008583068
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,768,512,0.005339733262856802
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,512,6144,0.021826134125391642
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,512,5120,0.008849066495895386
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,512,16384,0.030296534299850464
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,768,256,0.004993066688378652
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,512,5120,0.02039573391278585
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,512,4096,0.00787199983994166
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,768,128,0.004891733328501383
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,512,4096,0.019435733556747437
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,512,4096,0.01049066682656606
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,512,3584,0.007592533528804779
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,512,3584,0.019180800517400107
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,512,3072,0.007048533360163371
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,512,3072,0.018451199928919474
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,512,2560,0.00647680014371872
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,512,2560,0.017717333634694417
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,512,2560,0.00881173312664032
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,512,2048,0.005735466877619425
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,512,2048,0.017178666591644288
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,512,10240,0.02039253314336141
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,512,1536,0.004875733455022176
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,512,8192,0.016714666287104288
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,512,1536,0.016749866803487144
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,512,1024,0.004194133480389913
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,512,7168,0.01544319987297058
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,512,1024,0.015901866555213928
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,512,768,0.0038880000511805216
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,512,6144,0.01374613344669342
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,512,768,0.0159061332543691
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,512,768,0.0055861334005991616
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,512,512,0.0034944000343481696
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,512,512,0.015523200233777365
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,512,5120,0.012011733651161195
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,512,256,0.0031370667119820913
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,512,256,0.014882133404413859
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,512,3584,0.010132267077763876
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,512,3072,0.009316266576449076
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,512,128,0.003005866706371307
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,512,128,0.014545067151387533
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,512,64,0.0028704000016053517
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,512,64,0.014719999829928079
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,512,32,0.0028245332340399425
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,512,32,0.014717866977055868
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,512,2048,0.00783679982026418
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,256,65536,0.02919680078824361
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,256,65536,0.08527146975199382
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,256,16384,0.012119467059771221
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,256,16384,0.02681066592534383
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,512,1536,0.00699839989344279
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,256,12288,0.009480533003807069
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,512,1024,0.00613013356924057
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,256,12288,0.025409066677093507
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,256,10240,0.008450133601824443
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,256,10240,0.0237226665019989
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,512,512,0.005335466563701629
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,256,8192,0.007605333129564922
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,512,256,0.004951466619968414
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,256,8192,0.02204266587893168
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,256,7168,0.007237333556016285
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,256,7168,0.021983999013900756
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,512,128,0.004760533571243286
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,256,7168,0.015119999647140503
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,256,6144,0.008508800466855367
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,256,6144,0.020467199881871543
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,256,6144,0.013418666521708169
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,256,5120,0.007736533383528392
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,256,5120,0.021350399653116862
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,256,5120,0.01199893355369568
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,256,4096,0.007075199981530507
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,256,4096,0.019282132387161255
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,256,3584,0.006695466736952464
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,256,3584,0.019362133741378785
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,256,65536,0.10746666590372722
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,256,3072,0.006454400221506755
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,256,3072,0.0180842657883962
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,256,2560,0.006300800045331319
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,256,16384,0.02919253309567769
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,256,2560,0.017518933614095053
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,256,2048,0.005590400099754334
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,256,2048,0.017035732666651406
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,256,12288,0.022760534286499025
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,256,1536,0.004869333406289419
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,256,1536,0.016451199849446617
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,256,10240,0.019672532876332603
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,256,1024,0.004167466859022776
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,256,1024,0.015920000274976094
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,256,1024,0.006052266558011373
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,256,768,0.003852800031503042
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,256,8192,0.0164000004529953
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,256,768,0.015320533514022827
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,256,512,0.0035125332574049628
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,256,512,0.015185067057609558
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,256,4096,0.01039573351542155
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,256,256,0.0030975999931494398
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,256,3584,0.010063999891281128
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,256,256,0.014900267124176025
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,256,128,0.002942933390537898
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,256,3072,0.009206400314966837
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,256,128,0.01486186683177948
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,256,128,0.004658133288224538
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,256,2560,0.008794666330019633
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,256,64,0.002881066749493281
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,256,64,0.01477120021979014
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,256,32,0.0030464000999927522
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,256,32,0.014823466539382935
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,256,2048,0.007730133334795634
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,128,65536,0.02312320073445638
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,128,16384,0.007921066880226136
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,128,65536,0.08114666938781738
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,128,65536,0.10653546651204426
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,128,16384,0.025962666670481367
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,128,12288,0.007972266773382823
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,128,10240,0.007177599767843883
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,128,12288,0.02328746716181437
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,128,10240,0.022874667247136434
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,128,8192,0.006651733318964641
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,128,8192,0.022525866826375328
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,256,1536,0.00701333334048589
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,128,7168,0.00647573322057724
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,128,7168,0.021288534005482994
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,128,7168,0.015260799725850423
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,128,6144,0.006237866481145223
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,256,768,0.0055637334783871974
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,128,6144,0.019901865720748903
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,128,6144,0.013272533814112345
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,128,5120,0.006634666522343953
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,128,5120,0.019913599888483683
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,256,512,0.005368533233801523
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,128,4096,0.0061941335598627726
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,128,4096,0.01020906666914622
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,128,4096,0.019342933098475137
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,128,3584,0.006587733328342438
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,128,3584,0.018589866161346436
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,128,3072,0.006252799928188324
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,128,3072,0.017973333597183228
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,256,256,0.004863999783992767
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,128,3072,0.00901759962240855
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,128,2560,0.006111999849478403
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,128,2560,0.017604267597198485
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,128,16384,0.029041065772374468
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,128,2048,0.0054506664474805195
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,128,12288,0.022915200392405192
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,128,2048,0.017018665870030723
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,128,1536,0.004759466648101807
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,128,10240,0.019516799847284952
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,128,1536,0.01636799971262614
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,128,1536,0.006923733154932658
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,128,1024,0.004117333392302195
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,128,1024,0.015736533204714458
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,128,8192,0.016577066977818807
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,128,1024,0.006093866626421611
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,128,768,0.003688533355792364
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,128,768,0.015345066785812378
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,128,512,0.0034304000437259674
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,128,512,0.015312000115712484
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,128,512,0.005268266797065735
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,128,256,0.0031221332649389905
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,128,256,0.014941866199175516
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,128,128,0.0028746667007605235
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,128,128,0.014831999937693277
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,128,5120,0.011937066912651062
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,128,64,0.0027615999182065325
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,128,64,0.01453013320763906
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,128,3584,0.009966933727264404
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,128,32,0.0028064000109831494
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,128,32,0.014317867159843446
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,64,65536,0.017511467138926186
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,64,16384,0.00697386662165324
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,64,65536,0.07786453564961751
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,64,16384,0.025704532861709595
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,64,12288,0.006321066617965698
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,64,10240,0.006481066842873891
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,64,12288,0.022851200898488362
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,64,10240,0.02217386762301127
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,64,8192,0.006366933385531108
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,64,7168,0.006267733375231425
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,64,8192,0.022139734029769896
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,64,6144,0.00602346658706665
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,64,7168,0.021314134200414024
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,64,5120,0.006477866570154827
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,64,6144,0.019755733013153077
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,64,5120,0.021271467208862305
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,64,4096,0.006004266440868378
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,64,4096,0.019293866554896035
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,64,3584,0.006555733581384023
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,128,2560,0.008662399649620057
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,64,3072,0.006155733267466227
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,64,3584,0.01827413241068522
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,64,3072,0.018147200345993042
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,64,2560,0.006011733412742614
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,64,2048,0.005318399767080943
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,64,2560,0.01769386728604635
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,64,1536,0.004647466540336609
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,64,2048,0.01713706652323405
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,64,1024,0.0039658665657043455
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,64,1536,0.01685653328895569
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,128,2048,0.007625600198904674
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,64,1024,0.015843199690183003
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,64,768,0.0037119999527931214
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,64,768,0.015523200233777365
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,64,512,0.003291733314593633
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,64,128,0.002784000088771184
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,64,256,0.014845866958300272
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,128,768,0.005721599857012431
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,64,512,0.015260799725850423
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,64,256,0.002977066735426585
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,64,64,0.002657066782315572
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,64,128,0.014673067132631936
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,64,32,0.002703999976317088
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,64,64,0.014587733149528503
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,64,32,0.01460693379243215
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,32,65536,0.012885333100954691
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,32,16384,0.00621973325808843
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,128,256,0.004971733192602793
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,32,65536,0.0772437334060669
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,32,16384,0.02565866708755493
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,32,10240,0.022523732980092366
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,32,12288,0.006221866607666016
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,32,12288,0.02309760053952535
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,32,10240,0.006444799900054932
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,32,8192,0.00626986672480901
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,256,128,128,0.004781866570313772
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,32,7168,0.00610453337430954
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,32,8192,0.021031467119852702
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,32,6144,0.005982933441797892
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,32,7168,0.02153173287709554
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,32,5120,0.0063274666666984555
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,32,6144,0.019927465915679933
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,32,4096,0.006100266675154368
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,32,3072,0.01809599995613098
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,32,5120,0.02060799996058146
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,32,3584,0.006381866832574208
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,32,4096,0.019662932554880778
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,32,3584,0.018658133347829182
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,32,3072,0.006016000111897787
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,32,2560,0.006061866879463196
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,32,2048,0.005379199981689453
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,32,2560,0.018241065740585326
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,32,1536,0.004686933259169261
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,32,2048,0.017182934284210204
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,32,1024,0.004054400076468786
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,32,1536,0.016339199741681416
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,32,1024,0.01601599951585134
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,32,768,0.003642666588226954
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,32,768,0.01518186628818512
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,32,512,0.003385599950949351
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,32,512,0.015186132987340293
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,32,256,0.0031082667410373688
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,32,256,0.01490239997704824
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,32,128,0.002828799933195114
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,32,128,0.014548266927401224
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,32,64,0.002735999971628189
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,256,32,32,0.0027189334233601887
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,32,64,0.014507733782132468
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,256,32,32,0.01456106702486674
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,65536,12288,0.3220362663269043
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,65536,16384,0.41754881540934247
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,65536,12288,0.5912607828776042
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,65536,10240,0.4957770665486653
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,65536,16384,0.7733919779459636
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,65536,16384,0.6911306381225586
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,65536,10240,0.2716309229532877
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,65536,8192,0.2237237294514974
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,65536,8192,0.39488531748453776
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,65536,7168,0.3473994572957357
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,65536,7168,0.19972267150878906
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,65536,6144,0.30497280756632483
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,65536,6144,0.17717760403951008
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,65536,6144,0.27090988159179685
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,65536,5120,0.1511189301808675
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,65536,5120,0.25437866846720375
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,65536,4096,0.20692052841186523
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,65536,4096,0.1309717337290446
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,65536,3584,0.18270613352457682
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,65536,4096,0.17802240053812662
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,65536,3584,0.11493226687113445
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,65536,3072,0.15958399772644044
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,65536,3072,0.1013375997543335
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,65536,2560,0.1359829266866048
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,65536,2560,0.08917973041534424
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,65536,12288,0.5166751861572265
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,65536,2560,0.11338773568471272
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,65536,2048,0.11814933617909748
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,65536,2048,0.07601493199666341
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,65536,1536,0.08694612979888916
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,65536,2048,0.091157333056132
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,65536,10240,0.43430614471435547
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,65536,1536,0.06287360191345215
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,65536,1024,0.06945919990539551
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,65536,8192,0.3672287940979004
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,65536,1024,0.05073813199996948
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,65536,1024,0.04937599897384644
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,65536,768,0.044499198595682785
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,65536,768,0.055853867530822755
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,65536,768,0.04621973435084025
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,65536,7168,0.315666135152181
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,65536,512,0.037638401985168456
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,65536,512,0.0381877342859904
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,65536,256,0.02783573269844055
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,65536,256,0.031906133890151976
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,65536,128,0.018888533115386963
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,65536,128,0.028376533587773638
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,65536,128,0.027318400144577027
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,65536,64,0.014166399836540222
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,65536,64,0.027824000517527266
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,65536,32,0.014921599626541137
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,65536,32,0.029140265782674153
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,65536,5120,0.2254026730855306
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,65536,3584,0.15580372810363768
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,16384,65536,0.43602771759033204
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,16384,65536,0.7651114781697591
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,16384,16384,0.19458239873250324
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,65536,3072,0.13548906644185382
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,16384,65536,0.6739882787068685
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,16384,16384,0.1236074686050415
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,16384,12288,0.150545072555542
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,16384,16384,0.18547733624776203
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,16384,12288,0.10053760210673016
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,16384,10240,0.12641706466674804
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,16384,10240,0.08476906617482503
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,16384,8192,0.10121173063913982
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,16384,8192,0.07184853553771972
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,16384,7168,0.08930986722310384
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,16384,7168,0.06483199993769327
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,16384,7168,0.08510826428731283
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,65536,1536,0.06683200200398763
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,16384,6144,0.07932266394297281
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,16384,6144,0.058829867839813234
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,16384,6144,0.07189760208129883
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,16384,5120,0.06669119993845621
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,16384,5120,0.05209920008977255
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,16384,4096,0.05477973222732544
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,16384,5120,0.05989973147710165
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,16384,4096,0.04524799982706706
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,16384,3584,0.04871360063552856
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,16384,4096,0.04945493141810099
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,16384,3584,0.04194773435592651
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,65536,512,0.03600533405939738
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,65536,256,0.03062613407770793
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,16384,3584,0.044455464680989584
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,16384,3072,0.043068798383076985
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,16384,3072,0.038925866285959884
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,16384,2560,0.0370741327603658
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,16384,2560,0.03502399921417236
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,16384,2048,0.030909866094589233
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,16384,2048,0.03071039915084839
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,16384,1536,0.02487679918607076
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,16384,1536,0.026049067576726277
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,16384,12288,0.13729599316914876
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,16384,1024,0.019602133830388387
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,16384,1024,0.02301759918530782
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,16384,10240,0.11417280038197834
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,16384,8192,0.09635840257008871
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,16384,768,0.01511146624883016
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,16384,768,0.015099733074506124
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,16384,768,0.02113920052846273
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,16384,512,0.010153599580128988
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,16384,512,0.019808000326156615
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,16384,256,0.007411199808120728
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,16384,256,0.01730560064315796
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,16384,256,0.010850133498509724
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,16384,128,0.005554133156935374
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,16384,128,0.016659200191497803
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,16384,128,0.009755733609199523
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,16384,64,0.00487253318230311
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,16384,3072,0.038184531529744464
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,16384,64,0.016992000738779704
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,16384,32,0.005401599903901418
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,16384,2560,0.03333546717961629
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,16384,32,0.017436800400416057
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,16384,2048,0.026897066831588747
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,16384,1536,0.02212693293889364
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,12288,16384,0.1685642719268799
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,12288,16384,0.10048426787058513
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,12288,65536,0.3647295951843262
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,12288,12288,0.1343509356180827
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,12288,65536,0.6056629180908203
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,12288,12288,0.0809887965520223
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,12288,12288,0.1177898645401001
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,12288,10240,0.11709653536478679
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,12288,10240,0.0793066660563151
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,12288,8192,0.10466773509979248
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,12288,8192,0.058715732892354336
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,12288,7168,0.08756159941355388
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,12288,7168,0.054662398497263585
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,12288,7168,0.07569493452707926
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,12288,6144,0.07731413046518962
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,12288,6144,0.04899199803670247
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,16384,1024,0.01646719972292582
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,12288,5120,0.06344106594721476
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,12288,5120,0.04366933504740397
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,12288,4096,0.04151360193888347
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,16384,512,0.012702932953834534
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,12288,4096,0.03818133274714152
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,12288,3584,0.03744959831237793
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,12288,3584,0.03557013273239136
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,12288,16384,0.15519572893778483
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,12288,3072,0.03333973487218221
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,12288,3072,0.03309973279635112
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,12288,65536,0.5814485549926758
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,12288,10240,0.099944535891215
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,12288,2560,0.0322762668132782
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,12288,2560,0.02950506607691447
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,12288,8192,0.08506666819254557
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,12288,2560,0.029204267263412475
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,12288,2048,0.023654399315516154
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,12288,2048,0.02615999976793925
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,12288,1536,0.019048533837000527
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,12288,1536,0.0236735999584198
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,12288,1024,0.013017599781354269
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,12288,6144,0.06408640146255493
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,12288,1024,0.021244800090789794
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,12288,768,0.01072746713956197
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,12288,768,0.02032426595687866
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,12288,5120,0.053692801793416345
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,12288,512,0.008505599697430928
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,12288,4096,0.04431466658910115
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,12288,512,0.018203733364741008
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,12288,256,0.005565866827964783
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,12288,3584,0.038984533150990805
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,12288,256,0.016709333658218382
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,12288,128,0.004664533336957296
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,12288,128,0.016290133198102318
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,12288,3072,0.035206401348114015
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,12288,64,0.004222933451334635
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,12288,64,0.01633386711279551
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,12288,32,0.0045162667830785114
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,12288,32,0.016806399822235106
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,12288,2048,0.02469546596209208
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,12288,1536,0.019602133830388387
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,12288,1024,0.01480959951877594
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,12288,768,0.013127467036247254
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,10240,65536,0.3200021425882975
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,10240,16384,0.13729920387268066
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,10240,65536,0.4999146779378255
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,10240,16384,0.09357972939809164
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,10240,12288,0.10121386845906574
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,12288,512,0.010514133175214132
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,10240,12288,0.07506133715311686
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,10240,10240,0.06423360109329224
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,12288,256,0.008653866251309712
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,10240,10240,0.08905920187632242
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,10240,8192,0.07093760172526041
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,10240,8192,0.05348480145136515
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,12288,128,0.007769600053628286
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,10240,7168,0.06000426610310873
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,10240,7168,0.04926506678263347
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,10240,6144,0.05595946709314982
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,10240,6144,0.0447872002919515
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,10240,16384,0.1445290724436442
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,10240,65536,0.5442239761352539
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,10240,12288,0.1089962641398112
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,10240,5120,0.04698453346888225
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,10240,5120,0.05125120083491007
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,10240,4096,0.03640213410059611
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,10240,5120,0.041366398334503174
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,10240,4096,0.03519359827041626
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,10240,10240,0.09394453366597494
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,10240,3584,0.03330986698468526
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,10240,3584,0.037666134039560956
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,10240,3072,0.030010666449864703
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,10240,3072,0.030646399656931562
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,10240,8192,0.07984106540679932
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,10240,2560,0.025594667593638105
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,10240,2560,0.027730133136113482
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,10240,2048,0.021639466285705566
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,10240,2560,0.027040000756581622
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,10240,7168,0.07130133310953776
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,10240,2048,0.025143466393152875
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,10240,1536,0.017268266280492148
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,10240,1024,0.012481066584587096
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,10240,1536,0.022987733284632363
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,10240,6144,0.060525866349538174
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,10240,1024,0.02050986687342326
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,10240,768,0.010451199611028035
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,10240,768,0.01948266625404358
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,10240,512,0.00766186664501826
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,10240,512,0.017802667617797852
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,10240,4096,0.04210133155186971
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,10240,256,0.005399466554323832
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,10240,3584,0.0367520014444987
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,10240,256,0.016362667083740234
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,10240,3072,0.03307200074195862
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,10240,128,0.004503466685612996
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,10240,128,0.015879467129707336
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,10240,2048,0.022780799865722658
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,10240,64,0.004117333392302195
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,10240,32,0.004273066421349844
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,10240,1536,0.018279467026392618
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,10240,64,0.016133333245913185
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,10240,32,0.016126933693885803
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,10240,1024,0.013702399532000222
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,10240,768,0.012103466192881267
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,8192,16384,0.10204906463623047
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,10240,512,0.00944533348083496
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,8192,16384,0.07649813493092855
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,8192,65536,0.2600042661031087
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,8192,16384,0.09514346917470297
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,8192,65536,0.4177866617838542
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,8192,12288,0.06197546720504761
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,8192,12288,0.07969919840494791
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,8192,10240,0.06869440078735352
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,8192,8192,0.05379093488057455
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,8192,8192,0.045184000333150225
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,8192,10240,0.06052480141321818
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,8192,8192,0.049958399931589764
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,8192,7168,0.04817813237508138
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,8192,7168,0.042453332742055254
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,8192,6144,0.04276586771011352
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,8192,6144,0.038627199331919354
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,8192,5120,0.03691626787185669
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,8192,5120,0.03480213483174642
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,10240,256,0.007693866888682048
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,8192,4096,0.028952533006668092
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,10240,128,0.007034666836261749
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,8192,4096,0.030477867523829145
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,8192,4096,0.028250666459401448
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,8192,3584,0.02598293423652649
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,8192,3584,0.028486400842666626
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,8192,3072,0.023257599274317423
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,8192,3072,0.026044799884160356
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,8192,65536,0.3613173484802246
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,8192,12288,0.07253013451894125
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,8192,2560,0.022552533944447836
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,8192,10240,0.06022186676661173
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,8192,2560,0.024260266621907552
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,8192,2048,0.016275200247764587
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,8192,2048,0.022555732727050783
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,8192,2048,0.016953599452972413
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,8192,1536,0.01264959971110026
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,8192,7168,0.0446997324625651
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,8192,1536,0.020745599269866945
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,8192,1024,0.009616000453631084
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,8192,1024,0.01871466636657715
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,8192,6144,0.03908160130182902
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,8192,768,0.00823359986146291
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,8192,768,0.01698026657104492
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,8192,5120,0.033214932680130003
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,8192,512,0.0070592001080513
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,8192,512,0.016772266228993735
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,8192,512,0.008387200037638346
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,8192,256,0.004549333453178405
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,8192,256,0.016269866625467935
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,8192,3584,0.025032534201939897
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,8192,128,0.004030933231115341
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,8192,128,0.015466666221618653
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,8192,3072,0.022706133127212525
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,8192,64,0.003642666588226954
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,8192,64,0.015636266271273295
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,8192,32,0.003984000037113826
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,8192,32,0.016109866897265117
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,8192,2560,0.019509333372116088
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,8192,1536,0.014274133245150247
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,7168,65536,0.243996795018514
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,7168,65536,0.36428372065226233
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,8192,1024,0.011467732985814412
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,7168,16384,0.134115203221639
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,7168,16384,0.0904362678527832
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,7168,16384,0.08233173688252768
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,8192,768,0.010099200407663982
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,7168,12288,0.08004053433736166
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,7168,12288,0.057323733965555825
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,7168,10240,0.08932159741719564
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,7168,10240,0.05096533298492432
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,7168,8192,0.04953386783599854
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,7168,8192,0.04752746820449829
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,7168,8192,0.04233280022939046
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,7168,7168,0.041978665192921955
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,8192,256,0.007421866556008657
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,7168,6144,0.037375998497009275
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,7168,7168,0.03946026563644409
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,7168,6144,0.03641706705093384
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,7168,6144,0.036253865559895834
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,7168,5120,0.03183573285738627
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,7168,4096,0.026228266954421996
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,7168,5120,0.03317226568857829
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,7168,4096,0.028460800647735596
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,7168,3584,0.023644800980885824
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,7168,3584,0.027195733785629273
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,8192,128,0.006930133203665416
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,7168,3072,0.021115734179814657
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,7168,3072,0.025462400913238526
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,7168,65536,0.33558613459269204
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,7168,12288,0.0687328020731608
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,7168,2560,0.020272000630696615
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,7168,2560,0.02459413409233093
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,7168,10240,0.05845973491668701
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,7168,2560,0.018235733111699425
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,7168,2048,0.014975999792416891
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,7168,1536,0.011677866180737812
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,7168,2048,0.022078933318456014
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,7168,1536,0.02149440050125122
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,7168,7168,0.04203519821166992
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,7168,1024,0.009083732962608337
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,7168,1024,0.01886826753616333
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,7168,5120,0.030958932638168336
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,7168,768,0.00783679982026418
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,7168,768,0.009013332923253377
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,7168,768,0.018152532974878947
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,7168,512,0.0064181332786877945
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,7168,4096,0.026473599672317504
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,7168,512,0.01709866722424825
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,7168,256,0.00459199994802475
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,7168,256,0.0167413334051768
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,7168,128,0.004252799848715464
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,7168,3584,0.023037866751352946
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,7168,128,0.015833600362141927
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,7168,64,0.003928533444801966
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,7168,64,0.015980799992879234
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,7168,32,0.00420906643072764
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,7168,32,0.016196266810099284
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,7168,3072,0.021409066518147786
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,6144,65536,0.3129376093546549
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,6144,65536,0.21602346102396647
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,6144,16384,0.0928394635518392
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,6144,16384,0.06600000063578287
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,6144,16384,0.08377280235290527
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,6144,12288,0.08795093695322673
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,7168,2048,0.015416533748308817
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,7168,1536,0.0127402663230896
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,6144,12288,0.054378668467203774
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,6144,10240,0.07381227016448974
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,6144,10240,0.05019199848175049
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,7168,1024,0.010605866710344952
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,6144,8192,0.04394986629486084
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,6144,8192,0.04202880064646403
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,6144,7168,0.036848000685373944
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,7168,512,0.007574399809042613
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,6144,7168,0.038341331481933597
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,7168,256,0.0065077334642410275
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,6144,6144,0.033505066235860186
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,6144,6144,0.0343722661336263
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,7168,128,0.0058783998092015585
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,6144,5120,0.03004266619682312
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,6144,5120,0.03191039959589641
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,6144,5120,0.029531733194986982
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,6144,4096,0.023090134064356484
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,6144,4096,0.027999999125798543
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,6144,65536,0.30347731908162434
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,6144,3584,0.023538132508595787
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,6144,3584,0.026415999730428057
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,6144,12288,0.06378133296966552
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,6144,3072,0.019244800011316933
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,6144,3072,0.024540799856185912
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,6144,10240,0.05501546859741211
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,6144,2560,0.015677866339683533
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,6144,8192,0.04469013214111328
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,6144,2560,0.023277866840362548
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,6144,2560,0.017240534226099648
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,6144,2048,0.013246933619181315
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,6144,2048,0.02138026754061381
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,6144,1536,0.010379733641942342
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,6144,7168,0.039867734909057616
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,6144,1536,0.0199072003364563
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,6144,6144,0.0344703992207845
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,6144,1536,0.012160000205039979
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,6144,1024,0.00804799993832906
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,6144,1024,0.017858133713404337
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,6144,768,0.0068917334079742435
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,6144,768,0.017042134205500284
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,6144,512,0.005420800050099691
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,6144,512,0.016777600844701132
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,6144,4096,0.025174399216969807
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,6144,256,0.0041461333632469176
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,6144,256,0.015672533710797628
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,6144,128,0.0037162666519482933
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,6144,128,0.015278933445612588
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,6144,3584,0.022446932395299275
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,6144,64,0.0034965333839257562
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,6144,64,0.015723733107248943
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,6144,32,0.0037151999771595
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,6144,32,0.015756799777348836
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,6144,3072,0.019833600521087645
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,5120,65536,0.254312531153361
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,6144,2048,0.015188266833623251
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,5120,65536,0.18846720059712727
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,5120,16384,0.06742506821950277
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,5120,16384,0.05978986819585165
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,5120,12288,0.05306346813837687
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,6144,1024,0.010095999638239542
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,5120,12288,0.04872533480326335
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,6144,768,0.008489599823951722
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,5120,10240,0.047975468635559085
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,6144,512,0.007503999769687653
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,5120,10240,0.04238933324813843
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,5120,8192,0.038970665136973066
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,5120,8192,0.03701653480529785
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,5120,7168,0.03477546771367391
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,6144,256,0.006375466783841451
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,5120,7168,0.03454080025355021
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,5120,7168,0.03680213292439778
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,5120,6144,0.02873493234316508
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,6144,128,0.005850666761398315
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,5120,6144,0.03186560074488322
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,5120,5120,0.025805866718292235
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,5120,5120,0.02837226589520772
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,5120,4096,0.021574399868647256
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,5120,4096,0.025856000185012818
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,5120,16384,0.07886933485666911
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,5120,3584,0.019452800353368126
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,5120,65536,0.2816704114278158
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,5120,3584,0.024553600947062174
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,5120,12288,0.060420266787211095
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,5120,3072,0.016081066926320393
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,5120,3072,0.0233130673567454
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,5120,3072,0.019219199816385903
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,5120,10240,0.0510101318359375
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,5120,2560,0.014747732877731323
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,5120,2560,0.02209279934565226
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,5120,2048,0.011145599683125814
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,5120,2048,0.020696532726287842
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,5120,8192,0.04165759881337484
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,5120,1536,0.009360000491142273
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,5120,1536,0.01944640080134074
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,5120,1024,0.007326933244864146
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,5120,1024,0.01741973360379537
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,5120,6144,0.03221653302510579
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,5120,768,0.006735999882221222
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,5120,5120,0.027905066808064777
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,5120,768,0.01664959987004598
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,5120,512,0.005299200117588043
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,5120,512,0.01623466710249583
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,5120,4096,0.02384213407834371
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,5120,256,0.004009599983692169
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,5120,256,0.01573973298072815
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,5120,3584,0.021321600675582884
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,5120,128,0.003772799919048945
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,5120,128,0.015177599589029946
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,5120,2560,0.01625706652800242
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,5120,64,0.0035274667044480645
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,5120,2048,0.013959466417630514
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,5120,1536,0.011660800377527872
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,5120,32,0.00352960005402565
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,5120,64,0.015293866395950317
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,5120,32,0.0151936004559199
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,5120,1024,0.009366400043169658
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,4096,65536,0.21028374036153158
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,4096,65536,0.16309119860331217
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,4096,16384,0.061843200524648034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,4096,16384,0.056435199578603115
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,5120,768,0.0078005333741505934
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,4096,16384,0.06719466845194498
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,4096,12288,0.0441866676012675
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,4096,12288,0.04572906494140625
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,5120,512,0.00687360018491745
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,4096,10240,0.052306131521860755
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,4096,10240,0.03830506801605225
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,4096,8192,0.029360000292460126
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,4096,8192,0.032015999158223465
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,5120,256,0.006037333110968272
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,4096,7168,0.029345067342122395
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,4096,7168,0.029848533868789672
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,4096,7168,0.03381119966506958
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,4096,6144,0.026423466205596925
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,4096,6144,0.0280458668867747
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,4096,5120,0.021639466285705566
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,4096,5120,0.02604373296101888
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,5120,128,0.005500799914201101
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,4096,4096,0.016670932372411094
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,4096,4096,0.023411200443903605
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,4096,3584,0.014906666676203408
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,4096,3584,0.02260800004005432
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,4096,65536,0.2586080074310303
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,4096,3072,0.014435199896494546
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,4096,12288,0.05332479874293009
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,4096,3072,0.02167466680208842
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,4096,10240,0.044334932168324785
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,4096,2560,0.011681066950162251
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,4096,2560,0.020359466473261513
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,4096,8192,0.03667946656545003
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,4096,2048,0.009822932879130046
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,4096,2048,0.019326933224995933
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,4096,1536,0.008238933483759562
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,4096,1536,0.017802667617797852
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,4096,6144,0.028630399703979494
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,4096,1024,0.006617600222428639
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,4096,1024,0.01691733400026957
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,4096,5120,0.024333866437276204
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,4096,768,0.005437866846720377
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,4096,768,0.016405333081881204
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,4096,4096,0.021185066302617392
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,4096,512,0.00453653335571289
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,4096,512,0.015936000148455302
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,4096,3584,0.019517866770426433
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,4096,256,0.003869866579771042
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,4096,256,0.016081066926320393
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,4096,3072,0.01767680048942566
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,4096,2560,0.015130666891733804
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,4096,128,0.0035402665535608927
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,4096,128,0.015282133221626281
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,4096,2048,0.013059199849764506
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,4096,128,0.005857066810131073
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,4096,64,0.0032597333192825317
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,4096,32,0.0034858666360378264
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,4096,64,0.015213867028554281
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,4096,1536,0.011117866635322571
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,4096,32,0.01568000018596649
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,3584,16384,0.0562549352645874
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,3584,65536,0.18595199584960936
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,4096,1024,0.0086709330479304
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,3584,65536,0.15201600392659503
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,3584,16384,0.05740799903869629
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,3584,12288,0.03914560079574585
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,4096,768,0.007973333199818928
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,3584,12288,0.044750932852427164
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,3584,10240,0.038372266292572024
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,3584,8192,0.0401472012201945
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,3584,10240,0.039826134840647384
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,3584,8192,0.03473600149154663
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,3584,7168,0.03511679967244466
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,3584,7168,0.031150933106740313
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,4096,512,0.007031466563542683
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,3584,6144,0.021741867065429688
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,3584,6144,0.02866133252779643
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,4096,256,0.006257066627343495
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,3584,5120,0.025868799289067584
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,3584,5120,0.026103466749191284
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,3584,65536,0.24876267115275064
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,3584,16384,0.07074560324350992
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,3584,4096,0.02040533423423767
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,3584,12288,0.05349973440170288
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,3584,4096,0.024076799551645912
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,3584,3584,0.0192138671875
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,3584,10240,0.046349867184956865
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,3584,3584,0.02267306645711263
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,3584,3584,0.018976000944773357
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,3584,3072,0.015422933300336204
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,3584,8192,0.03853760162989299
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,3584,3072,0.021577600638071695
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,3584,2560,0.012796800335248312
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,3584,2560,0.02048426667849223
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,3584,2048,0.010339200496673584
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,3584,2048,0.019564799467722573
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,3584,7168,0.03378346761067708
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,3584,1536,0.008424533406893413
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,3584,1536,0.01741546591122945
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,3584,6144,0.029739733537038165
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,3584,1024,0.006840533514817555
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,3584,1024,0.016275200247764587
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,3584,5120,0.02518293261528015
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,3584,768,0.005460266768932342
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,3584,768,0.016244266430536905
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,3584,4096,0.02132693330446879
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,3584,512,0.0045514668027559916
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,3584,512,0.016151466965675355
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,3584,3072,0.01737066706021627
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,3584,256,0.003952000041802724
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,3584,2560,0.015110400319099427
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,3584,256,0.015520000457763672
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,3584,2048,0.012707199652989706
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,3584,128,0.003610666592915853
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,3584,128,0.005321600039800008
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,3584,128,0.015025066335995993
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,3584,1536,0.010099200407663982
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,3584,64,0.0033290666838486993
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,3584,64,0.015058133006095886
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,3584,32,0.003504000107447306
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,3584,32,0.015367466211318969
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,3072,16384,0.05466986497243246
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,3072,65536,0.13950613339742024
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,3072,65536,0.16329174041748046
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,3072,16384,0.04679893255233765
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,3072,12288,0.04358293215433757
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,3072,12288,0.04012480179468791
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,3072,10240,0.03547413349151611
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,3072,10240,0.035019731521606444
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,3584,1024,0.008371200164159138
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,3072,8192,0.025177599986394246
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,3584,768,0.007366399963696797
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,3072,8192,0.03108479976654053
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,3072,7168,0.0217141330242157
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,3072,7168,0.026735999186833698
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,3584,512,0.006485333542029063
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,3072,6144,0.02114560008049011
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,3584,256,0.005757866799831391
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,3072,6144,0.02853226661682129
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,3072,5120,0.016193067034085594
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,3072,5120,0.02579093376795451
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,3072,65536,0.24105067253112794
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,3072,16384,0.06830933094024658
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,3072,4096,0.013077333569526672
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,3072,12288,0.052538665135701504
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,3072,4096,0.02353066603342692
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,3072,3584,0.014430933197339377
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,3072,3584,0.022191999355951945
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,3072,3072,0.013569066921869913
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,3072,3072,0.021245867013931274
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,3072,10240,0.04434560139973958
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,3072,2560,0.011246933539708456
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,3072,2560,0.020513067642847695
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,3072,8192,0.03709760109583537
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,3072,2048,0.009619200229644775
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,3072,2048,0.019771732886632285
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,3072,7168,0.03279786705970764
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,3072,1536,0.007896533111731212
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,3072,1536,0.01758079926172892
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,3072,1536,0.009821866949399311
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,3072,1024,0.0060810665289560955
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,3072,1024,0.016458666324615477
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,3072,6144,0.02838933269182841
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,3072,768,0.005193600058555603
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,3072,768,0.016394666830698647
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,3072,768,0.0071285332242647815
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,3072,5120,0.024195200204849242
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,3072,512,0.00439573327700297
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,3072,512,0.016122666994730632
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,3072,256,0.003822933385769526
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,3072,256,0.015299200018246969
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,3072,4096,0.020486400524775187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,3072,128,0.0034613333642482757
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,3072,3584,0.018675200144449868
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,3072,128,0.014986667037010192
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,3072,64,0.0032821332414944967
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,3072,3072,0.016594133774439492
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,3072,64,0.014986667037010192
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,3072,32,0.0033919999996821085
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,3072,32,0.015069866180419922
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,3072,2560,0.014169599612553915
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,2560,65536,0.1404245376586914
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,2560,65536,0.12553706963857014
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,2560,16384,0.04294293324152629
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,2560,16384,0.043611733118693034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,2560,12288,0.03352320194244385
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,3072,2048,0.012499200304349263
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,2560,12288,0.03645439942677815
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,2560,10240,0.02534079949061076
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,2560,10240,0.03237333297729492
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,3072,1024,0.008062933385372163
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,2560,8192,0.020844799280166627
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,2560,8192,0.02762239972750346
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,3072,512,0.006278400123119354
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,2560,7168,0.02108479936917623
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,3072,256,0.005637333293755849
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,2560,7168,0.026255999008814496
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,2560,6144,0.01835093299547831
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,3072,128,0.005115733544031779
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,2560,6144,0.02498133381207784
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,2560,65536,0.23452372550964357
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,2560,16384,0.06631040175755819
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,2560,5120,0.013729066650072733
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,2560,12288,0.05090879996617635
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,2560,5120,0.02369173367818197
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,2560,4096,0.011454932888348897
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,2560,4096,0.02142613331476847
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,2560,3584,0.010506666700045268
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,2560,10240,0.04294293324152629
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,2560,3584,0.0209824005762736
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,2560,3584,0.01804373264312744
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,2560,3072,0.010943999886512757
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,2560,3072,0.020259199539820354
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,2560,2560,0.00860053300857544
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,2560,2560,0.019478400548299156
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,2560,8192,0.03541546662648519
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,2560,2048,0.008525866270065307
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,2560,2048,0.01790293256441752
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,2560,7168,0.03216106692949931
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,2560,1536,0.007301333546638489
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,2560,1536,0.01715946594874064
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,2560,6144,0.027846399943033857
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,2560,1024,0.005418666700522105
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,2560,1024,0.01613653302192688
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,2560,5120,0.02278613249460856
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,2560,768,0.004716800153255462
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,2560,768,0.015913599729537965
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,2560,512,0.004033066580692927
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,2560,4096,0.020474666357040407
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,2560,512,0.01570026675860087
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,2560,256,0.0035061334570248926
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,2560,256,0.01509119967619578
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,2560,3072,0.01609280010064443
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,2560,128,0.0032853332658608755
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,2560,128,0.014994133512179056
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,2560,2560,0.0141184002161026
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,2560,64,0.0031306666632493338
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,2560,64,0.014971733093261719
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,2560,32,0.0032245332996050516
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,2560,2048,0.011563733220100403
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,2560,32,0.01493333379427592
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,2048,65536,0.11484800179799397
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,2048,65536,0.11265280246734619
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,2048,16384,0.040046934286753336
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,2048,16384,0.03996586799621582
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,2560,1536,0.009628799557685853
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,2048,12288,0.03160533308982849
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,2048,12288,0.032986666758855185
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,2560,1024,0.007910400132338206
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,2048,10240,0.027859199047088622
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,2048,10240,0.02860693335533142
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,2560,768,0.007111466427644093
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,2048,8192,0.01919040083885193
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,2560,512,0.006097066899140676
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,2048,8192,0.028488532702128096
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,2048,7168,0.017423999309539796
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,2560,256,0.0055871998270352686
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,2048,7168,0.026682666937510174
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,2048,6144,0.01532373329003652
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,2048,6144,0.02544426719347636
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,2560,128,0.00518506666024526
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,2048,5120,0.017828265825907387
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,2048,5120,0.023730132977167764
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,2048,65536,0.21628160476684571
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,2048,16384,0.058956801891326904
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,2048,4096,0.014549332857131957
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,2048,4096,0.021995733181635536
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,2048,3584,0.012817066907882691
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,2048,3584,0.02103040019671122
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,2048,12288,0.04608319997787476
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,2048,3072,0.011458133657773335
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,2048,3072,0.019591466585795084
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,2048,10240,0.03893760045369466
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,2048,2560,0.010454400380452474
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,2048,2560,0.020258132616678873
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,2048,8192,0.032059733072916666
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,2048,2048,0.008671999971071879
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,2048,2048,0.018422400951385497
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,2048,7168,0.02807146708170573
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,2048,1536,0.00606826643149058
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,2048,1536,0.017198934157689413
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,2048,6144,0.024969599644343057
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,2048,1024,0.004861866434415182
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,2048,1024,0.016376533110936484
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,2048,5120,0.021514666080474854
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,2048,768,0.0042303999265035
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,2048,768,0.016082132856051125
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,2048,4096,0.0182805339495341
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,2048,512,0.0037941334148248037
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,2048,512,0.015920000274976094
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,2048,3584,0.017348267634709678
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,2048,256,0.0033952000240484873
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,2048,256,0.015064533551534018
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,2048,3072,0.014841600259145101
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,2048,128,0.003142400085926056
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,2048,128,0.014812800288200378
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,2048,2560,0.01304213305314382
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,2048,64,0.0030154667794704436
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,2048,2048,0.010818133751551311
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,2048,64,0.014965333541234336
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,2048,32,0.0032437334458033243
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,2048,32,0.014877866705258688
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,2048,1536,0.009244799613952637
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,1536,65536,0.0931541363398234
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,1536,65536,0.1024469296137492
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,2048,1024,0.007670400043328603
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,1536,16384,0.0300437331199646
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,1536,16384,0.03726826508839925
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,2048,768,0.006939733525117238
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,1536,12288,0.023995733261108397
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,1536,12288,0.030882134040196733
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,1536,10240,0.02104746699333191
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,1536,10240,0.02765013376871745
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,2048,512,0.0060703997810681665
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,1536,8192,0.017246933778127034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,1536,8192,0.025561600923538208
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,2048,256,0.005634133517742157
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,1536,7168,0.01476800044377645
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,1536,7168,0.024291199445724488
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,2048,128,0.005110399921735128
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,1536,6144,0.014552533626556396
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,1536,6144,0.023253333568573
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,1536,5120,0.011515733599662781
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,1536,5120,0.02140480081240336
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,1536,65536,0.2081472078959147
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,1536,16384,0.05983999967575073
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,1536,4096,0.010935466488202412
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,1536,4096,0.01994453271230062
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,1536,12288,0.04610346555709839
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,1536,3584,0.010067199667294819
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,1536,3584,0.020386133591334024
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,1536,3072,0.009224533041318258
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,1536,3072,0.0184714674949646
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,1536,10240,0.03921813170115153
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,1536,2560,0.008200533191363017
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,1536,2560,0.01851946711540222
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,1536,8192,0.03227306604385376
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,1536,2048,0.0073077330986658735
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,1536,2048,0.017409066359202065
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,1536,7168,0.02943466703097026
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,1536,1536,0.005852800110975901
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,1536,1536,0.016676266988118492
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,1536,6144,0.026387200752894087
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,1536,1024,0.004786133269468943
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,1536,1024,0.01623253325621287
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,1536,5120,0.02212160031000773
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,1536,768,0.00425493319829305
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,1536,768,0.015501866738001505
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,1536,4096,0.018662399053573607
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,1536,512,0.0038111999630928038
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,1536,3584,0.016539733608563742
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,1536,512,0.01548479994138082
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,1536,256,0.003365333378314972
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,1536,256,0.014990933736165366
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,1536,3072,0.014519466956456503
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,1536,128,0.0031914666295051576
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,1536,128,0.014712533354759217
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,1536,2560,0.012748799721399941
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,1536,64,0.003028266628583272
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,1536,64,0.014830933014551798
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,1536,32,0.003058133274316788
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,1536,32,0.01481066644191742
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,1024,65536,0.06490240097045899
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,1024,65536,0.09261759916941324
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,1536,2048,0.010777599612871806
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,1024,16384,0.023909332354863484
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,1024,16384,0.03130240043004354
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,1536,1536,0.009078400333722432
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,1024,12288,0.019222400585810342
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,1024,12288,0.02942933241526286
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,1536,1024,0.007635200023651123
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,1024,10240,0.015185067057609558
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,1024,10240,0.02659200032552083
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,1536,768,0.006692266464233399
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,1024,10240,0.03637760082880656
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,1024,8192,0.012922666470209756
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,1024,8192,0.02336639960606893
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,1024,7168,0.012088533242543538
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,1024,7168,0.023487999041875204
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,1536,512,0.005938133100668589
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,1024,6144,0.012110933661460876
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,1536,256,0.0053845331072807315
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,1024,6144,0.022348799308141074
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,1024,5120,0.01076693336168925
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,1536,128,0.004977066814899444
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,1024,5120,0.020555732647577922
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,1024,4096,0.009300266702969868
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,1024,4096,0.019847466548283895
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,1024,65536,0.18761919339497884
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,1024,3584,0.008540800213813782
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,1024,16384,0.053352534770965576
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,1024,3584,0.018901334206263224
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,1024,3072,0.007595733304818471
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,1024,3072,0.01835626761118571
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,1024,12288,0.041867733001708984
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,1024,2560,0.007275733351707459
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,1024,2560,0.017416532834370932
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,1024,8192,0.028993066151936846
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,1024,7168,0.026410667101542155
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,1024,2048,0.006250666578610738
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,1024,2048,0.01728106737136841
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,1024,6144,0.02359573245048523
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,1024,1536,0.005250133574008942
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,1024,1536,0.01653333306312561
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,1024,5120,0.0195743997891744
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,1024,1024,0.004477866490681966
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,1024,4096,0.016309332847595216
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,1024,1024,0.015843199690183003
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,1024,768,0.004041599979003271
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,1024,768,0.015340800086657206
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,1024,3584,0.015310933192571005
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,1024,512,0.0036447999378045404
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,1024,512,0.015377066532770791
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,1024,3072,0.013705600301424661
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,1024,256,0.0032117334504922234
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,1024,256,0.01527679959932963
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,1024,2560,0.011940266688664753
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,1024,128,0.0030239999294281008
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,1024,128,0.014595199624697366
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,1024,64,0.002919466545184453
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,1024,64,0.014436266819636025
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,1024,2048,0.01015786627928416
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,1024,32,0.0029237332443396253
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,1024,32,0.014849066734313965
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,768,65536,0.051427201430002845
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,1024,1536,0.00890239973862966
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,768,65536,0.08005759716033936
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,768,16384,0.01648319959640503
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,1024,1024,0.007229866584142049
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,768,16384,0.029997867345809937
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,768,12288,0.015427199999491372
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,768,12288,0.028819199403127032
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,768,10240,0.012417067090670269
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,1024,768,0.006543999910354615
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,768,10240,0.026024534304936724
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,768,8192,0.012676266829172769
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,768,8192,0.023178666830062866
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,768,8192,0.024405332406361897
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,768,7168,0.013699199755986533
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,768,7168,0.022712532679239908
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,1024,512,0.005789866546789805
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,768,6144,0.012223999698956807
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,1024,256,0.005234133203824361
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,768,6144,0.020753065745035805
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,768,5120,0.010792533556620281
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,768,5120,0.020653865734736123
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,1024,128,0.004871466755867004
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,768,4096,0.009405866265296936
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,768,4096,0.019474132855733236
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,768,65536,0.16008106867472333
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,768,16384,0.04442453384399414
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,768,3584,0.00890880028406779
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,768,3584,0.018348799149195353
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,768,3072,0.0077461332082748415
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,768,12288,0.03461013237635295
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,768,3072,0.01778986652692159
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,768,2560,0.006596266726652781
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,768,10240,0.02913600007692973
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,768,2560,0.017497599124908447
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,768,2048,0.005830400188763936
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,768,2048,0.017564799388249716
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,768,7168,0.022011733055114745
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,768,1536,0.005193600058555603
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,768,1536,0.01657386620839437
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,768,6144,0.019460266828536986
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,768,1024,0.004539733131726583
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,768,5120,0.016642133394877114
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,768,1024,0.015753600001335143
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,768,768,0.004113066693147024
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,768,768,0.015431466698646545
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,768,4096,0.014422399799029031
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,768,512,0.003676799933115641
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,768,512,0.015848533312479655
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,768,3584,0.013090133666992188
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,768,256,0.0032885332902272543
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,768,3072,0.011688533425331115
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,768,256,0.014758400122324624
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,768,128,0.0030016000072161358
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,768,2560,0.010318932930628459
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,768,128,0.014365866780281067
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,768,64,0.0028405333558718365
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,768,2048,0.008931199709574383
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,768,64,0.014465066790580749
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,768,32,0.002762666592995326
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,768,32,0.014436266819636025
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,512,65536,0.03950506846110026
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,512,65536,0.07292266686757407
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,512,16384,0.013392000397046407
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,768,1536,0.008091733356316884
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,512,16384,0.026678399244944258
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,512,12288,0.011331199606259664
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,512,12288,0.024052266279856363
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,768,1024,0.006716800232728322
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,512,10240,0.011512533823649088
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,512,10240,0.02333973248799642
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,768,768,0.006182399888833364
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,512,8192,0.010101333260536194
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,512,8192,0.020801067352294922
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,768,512,0.005459199845790863
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,768,256,0.005148800214131674
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,512,7168,0.00965013305346171
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,512,7168,0.02161173423131307
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,768,128,0.004861866434415182
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,512,6144,0.008769067128499348
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,512,6144,0.02066453297932943
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,512,65536,0.1243125359217326
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,512,5120,0.008332799871762593
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,512,5120,0.019927465915679933
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,512,16384,0.03386773268381755
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,512,4096,0.007272533575693766
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,512,4096,0.0190720001856486
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,512,12288,0.026096000274022417
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,512,3584,0.0076682666937510175
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,512,3584,0.018441599607467652
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,512,3584,0.010493866602579753
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,512,3072,0.006909866631031036
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,512,3072,0.017854932943979898
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,512,10240,0.022207999229431154
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,512,3072,0.009410132964452107
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,512,2560,0.006295466423034668
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,512,2560,0.017013333241144814
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,512,2048,0.005520000060399374
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,512,2048,0.016794667641321818
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,512,8192,0.017858133713404337
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,512,1536,0.004926933348178864
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,512,1536,0.01624853312969208
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,512,1024,0.004220800101757049
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,512,1024,0.015708800156911215
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,512,1024,0.006198399762312571
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,512,768,0.003851733356714249
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,512,768,0.015583999951680503
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,512,7168,0.016108799974123636
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,512,512,0.003551999976237615
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,512,512,0.01543786625067393
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,512,512,0.00547733356555303
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,512,6144,0.014677332838376364
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,512,256,0.003142400085926056
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,512,256,0.01492693324883779
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,512,128,0.002980266759792964
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,512,128,0.014566399653752646
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,512,5120,0.012827733159065246
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,512,64,0.0028480000793933867
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,512,64,0.01460693379243215
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,512,32,0.0028490667541821797
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,512,4096,0.0113237331310908
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,512,32,0.014546133081118264
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,256,65536,0.02691626747449239
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,256,65536,0.06610346635182698
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,256,65536,0.101528533299764
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,256,16384,0.009815466403961182
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,256,16384,0.025565866629282636
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,256,16384,0.02744106650352478
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,256,12288,0.008945066730181377
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,256,12288,0.022855466604232787
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,256,12288,0.021497599283854165
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,256,10240,0.010170666376749675
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,256,10240,0.022395733992258707
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,256,10240,0.018636800845464072
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,512,2560,0.008728532989819845
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,256,8192,0.008190933366616566
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,256,8192,0.020061866442362467
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,256,8192,0.015455999970436096
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,256,7168,0.007881600161393483
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,512,2048,0.007614933451016744
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,256,7168,0.020407466093699138
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,256,7168,0.014186666409174601
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,256,6144,0.007386666536331177
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,512,1536,0.007188266515731812
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,256,6144,0.019476266702016194
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,256,5120,0.006837333242098491
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,256,5120,0.019612799088160195
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,256,5120,0.011392000317573547
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,256,4096,0.006388266881306966
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,256,4096,0.018887466192245482
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,256,4096,0.01042133371035258
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,256,3584,0.006761600077152252
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,256,3584,0.018269866704940796
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,256,3072,0.006375466783841451
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,256,2560,0.006185600161552429
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,256,3072,0.01790293256441752
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,512,768,0.0056639999151229855
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,256,2560,0.017340799172719322
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,256,2048,0.005565866827964783
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,256,2048,0.017034665743509928
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,256,2048,0.007222400108973186
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,256,1536,0.0048213332891464235
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,256,1536,0.016292267044385276
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,256,1536,0.006799999872843425
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,256,1024,0.004099199920892716
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,256,1024,0.01562879979610443
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,256,768,0.00383786658445994
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,512,256,0.004980266590913137
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,256,768,0.015097600221633912
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,256,512,0.0035071998834609987
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,256,512,0.015014400084813436
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,512,128,0.004786133269468943
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,256,512,0.005267199873924255
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,256,256,0.0031221332649389905
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,256,256,0.014632532993952433
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,256,6144,0.012894933422406515
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,256,128,0.0029397333661715193
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,256,128,0.014486400286356607
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,256,3584,0.010012800494829815
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,256,64,0.0027872001131375628
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,256,3072,0.008874666690826417
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,256,32,0.0028042666614055633
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,256,64,0.014582399527231851
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,256,2560,0.008258133133252462
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,256,32,0.014350933829943338
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,128,65536,0.02004479964574178
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,128,65536,0.061622401078542076
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,128,16384,0.008094933132330577
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,128,16384,0.027157332499821978
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,128,16384,0.023652267456054688
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,256,1024,0.005940266450246175
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,128,12288,0.007390933235486348
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,128,12288,0.02151040037473043
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,128,10240,0.007097599903742473
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,256,768,0.005625600119431814
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,128,10240,0.02104426622390747
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,128,10240,0.018619734048843383
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,128,8192,0.006674133241176605
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,128,8192,0.02023573319117228
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,128,7168,0.006410666803518932
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,128,7168,0.020142932732899986
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,128,6144,0.006155733267466227
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,256,256,0.004901333153247834
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,128,6144,0.0196234663327535
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,128,5120,0.006525866687297821
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,128,5120,0.011553066968917846
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,128,5120,0.019513599077860513
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,128,4096,0.006120533247788747
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,128,4096,0.018644267320632936
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,128,3584,0.006589866677920024
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,128,3584,0.017859200636545815
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,256,128,0.004727466901143392
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,128,3072,0.006202666461467743
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,128,3072,0.01788160006205241
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,128,65536,0.1006656010945638
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,128,2560,0.0060576001803080235
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,128,2560,0.017410133282343546
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,128,12288,0.02152106761932373
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,128,2560,0.00805866668621699
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,128,2048,0.0053941334287325535
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,128,2048,0.01690666675567627
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,128,8192,0.015449600418408713
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,128,1536,0.004659200211366018
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,128,7168,0.01436906655629476
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,128,1536,0.016380799810091655
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,128,1024,0.004037333279848098
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,128,1024,0.015561599532763162
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,128,1024,0.006033066908518473
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,128,768,0.003669333209594091
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,128,6144,0.012756266196568809
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,128,768,0.015244799852371215
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,128,512,0.003409066547950109
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,128,512,0.01558080017566681
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,128,256,0.003091199944416682
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,128,256,0.014603733023007711
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,128,4096,0.010291199882825215
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,128,256,0.005005866785844167
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,128,128,0.002916266769170761
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,128,128,0.014573867122332254
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,128,128,0.004681600133577982
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,128,64,0.002776533365249634
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,128,32,0.002734933296839396
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,128,64,0.014597333470980325
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,128,3584,0.009794132908185323
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,128,32,0.014651733636856078
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,64,65536,0.011285332838694255
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,64,16384,0.00747519979874293
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,64,65536,0.05708693265914917
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,64,16384,0.023462400833765665
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,64,12288,0.0065077334642410275
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,64,10240,0.006542933483918508
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,64,12288,0.021513599157333373
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,64,8192,0.006410666803518932
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,64,10240,0.021316266059875487
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,64,8192,0.02004479964574178
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,64,7168,0.00622080018122991
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,64,6144,0.0060693333546320595
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,64,7168,0.02064746618270874
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,64,6144,0.019805866479873657
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,64,5120,0.006380799909432728
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,64,4096,0.006020266811052958
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,64,5120,0.01973973313967387
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,64,3584,0.006481066842873891
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,64,4096,0.01916266679763794
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,128,3072,0.00876800020535787
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,64,3072,0.0063967997829119366
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,64,3584,0.018182400862375894
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,64,2560,0.01708586613337199
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,64,2560,0.006037333110968272
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,64,3072,0.017538134256998697
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,64,2048,0.005420800050099691
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,64,2048,0.017081600427627564
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,64,1536,0.004667733112970988
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,64,1536,0.016034133235613503
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,64,1024,0.004036266605059306
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,64,1024,0.01583999991416931
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,64,768,0.003585066646337509
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,64,512,0.003366400053103765
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,64,768,0.015305599570274353
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,64,512,0.014935466647148132
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,64,256,0.002977066735426585
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,64,128,0.0027722666660944624
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,64,256,0.014617600043614707
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,64,128,0.014539733529090881
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,64,64,0.0026410666604836782
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,128,2048,0.007085866729418437
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,64,32,0.014330666263898215
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,64,64,0.01458026667435964
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,64,32,0.0026367999613285064
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,128,1536,0.006870399912198384
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,32,65536,0.00988266666730245
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,32,16384,0.006357333560784657
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,32,65536,0.05658026536305746
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,32,12288,0.006211199859778086
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,32,12288,0.021588265895843506
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,32,16384,0.02370880047480265
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,32,10240,0.006489600241184235
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,128,768,0.005529599885145823
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,192,128,512,0.0052255998055140175
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,32,8192,0.006270933151245117
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,32,10240,0.021413334210713706
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,32,8192,0.019990400473276774
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,32,7168,0.0061749334136645
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,32,7168,0.021075199047724404
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,32,4096,0.00589333325624466
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,32,6144,0.005948799848556519
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,32,5120,0.006375466783841451
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,32,6144,0.01925546725591024
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,32,5120,0.020362667242685952
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,32,4096,0.01899306575457255
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,32,3584,0.006421333551406861
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,32,3072,0.0060138667623202005
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,32,3584,0.018438400824864705
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,32,3072,0.017672532796859743
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,32,2560,0.005955199897289276
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,32,2048,0.0053045332431793215
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,32,2560,0.01732800006866455
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,32,2048,0.016923733552296958
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,32,1536,0.004603733122348785
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,32,1536,0.016218666235605875
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,32,1024,0.0040224000811576845
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,32,1024,0.01590079963207245
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,32,256,0.002963199963172277
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,32,768,0.003623466690381368
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,32,768,0.015256533026695251
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,32,512,0.003401600072781245
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,32,512,0.014929067095120749
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,32,128,0.002799999962250392
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,32,256,0.01470080018043518
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,32,128,0.014965333541234336
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,32,64,0.002700799951950709
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,192,32,32,0.002755200117826462
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,32,64,0.014270933469136557
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,192,32,32,0.014416000247001648
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,65536,12288,0.32055253982543946
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,65536,16384,0.41756054560343425
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,65536,12288,0.5913365046183269
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,65536,10240,0.4951061248779297
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,65536,16384,0.774228286743164
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,65536,12288,0.5454528172810872
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,65536,10240,0.27053759892781576
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,65536,8192,0.22598506609598795
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,65536,8192,0.39651734034220376
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,65536,7168,0.19724267323811848
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,65536,7168,0.35681174596150717
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,65536,8192,0.40322345097859696
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,65536,6144,0.3037941296895345
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,65536,6144,0.17274667421976725
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,65536,5120,0.253985071182251
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,65536,5120,0.14809494018554686
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,65536,4096,0.20734720230102538
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,65536,4096,0.12442879676818848
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,65536,5120,0.2535967985788981
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,65536,3584,0.18148372968037924
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,65536,3584,0.11178133487701417
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,65536,3584,0.17483733495076498
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,65536,3072,0.15773332913716634
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,65536,3072,0.10002986590067546
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,65536,2560,0.08759146531422933
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,65536,2560,0.13300800323486328
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,65536,2560,0.1252832015355428
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,65536,2048,0.1096010684967041
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,65536,2048,0.07437866528828939
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,65536,1536,0.08584853013356528
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,65536,1536,0.06136746803919474
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,65536,16384,0.7209546407063802
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,65536,1024,0.06841066678365072
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,65536,10240,0.4736085255940755
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,65536,1024,0.04893759886423747
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,65536,768,0.04861013491948445
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,65536,768,0.04215466578801473
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,65536,7168,0.3554250717163086
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,65536,6144,0.30437545776367186
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,65536,512,0.03945813179016113
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,65536,512,0.034162131945292155
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,65536,512,0.035045333703358966
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,65536,256,0.022010666131973267
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,65536,4096,0.19636799494425455
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,65536,256,0.02616746624310811
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,65536,128,0.013422933220863343
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,65536,128,0.022733867168426514
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,65536,3072,0.14883519808451334
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,65536,64,0.010658133029937743
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,65536,64,0.022434133291244506
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,65536,32,0.010156800349553425
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,65536,32,0.02257706721623739
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,65536,2048,0.10162453651428223
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,65536,1536,0.07685440381368001
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,16384,65536,0.41956052780151365
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,16384,65536,0.7955466588338216
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,65536,1024,0.05252586603164673
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,16384,16384,0.1909002621968587
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,16384,16384,0.13614826202392577
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,65536,768,0.045533867677052815
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,16384,65536,0.7071776072184245
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,16384,12288,0.1465834617614746
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,16384,12288,0.09582186539967855
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,16384,10240,0.0818389336268107
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,16384,10240,0.14187733332316083
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,16384,10240,0.1269482692082723
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,16384,8192,0.09970026810963949
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,16384,8192,0.06941226323445639
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,16384,8192,0.10551146666208903
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,16384,7168,0.06346240043640136
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,65536,256,0.029847466945648195
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,16384,7168,0.08778133392333984
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,16384,7168,0.09278079668680826
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,16384,6144,0.0774826685587565
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,16384,6144,0.05760746796925863
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,16384,5120,0.06549226840337118
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,65536,128,0.026842667659123735
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,16384,6144,0.08050346374511719
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,16384,5120,0.05101226568222046
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,16384,4096,0.05384853283564249
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,16384,4096,0.04417920112609863
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,16384,3584,0.047363201777140304
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,16384,3584,0.04101226727167766
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,16384,3072,0.04246400197347005
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,16384,3072,0.03765226602554321
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,16384,2560,0.03634239832560222
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,16384,3072,0.04171093304951985
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,16384,2560,0.034050134817759196
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,16384,2048,0.02994133234024048
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,16384,2048,0.02975040078163147
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,16384,1536,0.023988266785939537
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,16384,1536,0.02610879937807719
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,16384,1536,0.023190399010976158
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,16384,1024,0.016901334126790367
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,16384,1024,0.022052266200383506
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,16384,16384,0.18935680389404297
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,16384,768,0.014597333470980325
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,16384,12288,0.147106138865153
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,16384,768,0.020866133769353232
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,16384,768,0.015054933230082192
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,16384,512,0.009835732976595561
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,16384,512,0.013107200463612875
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,16384,512,0.019374932845433554
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,16384,256,0.007158400118350982
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,16384,256,0.017333332697550455
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,16384,128,0.005205333232879639
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,16384,128,0.016500266393025716
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,16384,5120,0.06558080116907755
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,16384,64,0.004599466423193614
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,16384,4096,0.055233065287272134
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,16384,64,0.016977065801620485
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,16384,32,0.005107200145721436
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,16384,3584,0.04952213366826375
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,16384,32,0.017129600048065186
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,16384,2560,0.03660906553268432
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,16384,2048,0.03011946678161621
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,12288,16384,0.16503893534342448
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,12288,16384,0.0976469357808431
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,12288,65536,0.3530495961507162
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,12288,65536,0.5970975875854492
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,12288,12288,0.12976319789886476
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,16384,1024,0.017539199193318686
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,12288,12288,0.08150826295216879
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,12288,10240,0.09498559633890788
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,12288,10240,0.06725227038065593
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,12288,8192,0.0761578639348348
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,12288,10240,0.11003946463267009
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,12288,8192,0.05698133309682211
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,12288,7168,0.06835947036743165
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,12288,7168,0.05218773285547892
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,12288,6144,0.06012266476949056
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,12288,6144,0.047635201613108316
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,16384,256,0.01065066655476888
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,12288,5120,0.0510538657506307
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,16384,128,0.009731200337409974
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,12288,5120,0.04312533140182495
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,12288,4096,0.041043198108673094
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,12288,4096,0.03724266688028972
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,12288,4096,0.04855786561965943
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,12288,3584,0.036415998140970865
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,12288,16384,0.16289493242899578
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,12288,65536,0.6239370981852214
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,12288,3584,0.03477973143259684
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,12288,12288,0.1293621301651001
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,12288,3072,0.03258133331934611
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,12288,3072,0.03208000063896179
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,12288,2560,0.027966932455698652
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,12288,8192,0.09527253309885661
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,12288,2560,0.028883200883865357
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,12288,7168,0.08129920164744059
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,12288,2048,0.023410133520762124
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,12288,2048,0.027352533737818402
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,12288,2048,0.02563733259836833
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,12288,1536,0.018503467241923012
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,12288,6144,0.07042240301767985
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,12288,1536,0.02318506638209025
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,12288,1024,0.012633599837621055
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,12288,1024,0.02080000042915344
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,12288,768,0.010541866223017376
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,12288,1024,0.015705600380897522
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,12288,768,0.01975040038426717
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,12288,768,0.013554132978121438
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,12288,512,0.008469333251317341
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,12288,512,0.01789439916610718
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,12288,256,0.00517439991235733
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,12288,256,0.016268799702326454
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,12288,5120,0.060515201091766356
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,12288,128,0.004287999868392944
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,12288,128,0.015897599856058757
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,12288,64,0.003988266736268997
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,12288,64,0.01601599951585134
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,12288,32,0.004335999985535939
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,12288,32,0.016245333353678386
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,12288,3584,0.04351786772410075
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,12288,3072,0.037662935256958005
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,10240,65536,0.5133066813151042
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,12288,2560,0.03209706743558248
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,10240,65536,0.3112607955932617
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,10240,16384,0.16759786605834961
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,10240,16384,0.08674240112304688
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,12288,1536,0.02109439969062805
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,10240,12288,0.09879573186238608
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,10240,12288,0.0701632022857666
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,10240,10240,0.0823263963063558
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,10240,10240,0.061293868223826084
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,10240,8192,0.0665663997332255
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,12288,512,0.010662399729092916
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,10240,8192,0.05184213320414225
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,10240,8192,0.08656960328420003
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,10240,7168,0.05943359931310018
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,12288,256,0.008752000331878663
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,10240,7168,0.04760853449503581
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,10240,6144,0.05210026502609253
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,10240,6144,0.04336213270823161
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,10240,5120,0.04455466667811076
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,12288,128,0.0077674667040507
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,10240,6144,0.06628053188323975
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,10240,5120,0.039185067017873124
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,10240,4096,0.03515199820200603
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,10240,4096,0.034170667330423996
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,10240,3584,0.03231893380482991
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,10240,3584,0.031959466139475506
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,10240,16384,0.15390933354695638
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,10240,12288,0.1205024003982544
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,10240,3072,0.029899734258651733
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,10240,65536,0.5683253606160481
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,10240,3072,0.02912213404973348
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,10240,2560,0.027217066287994383
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,10240,2560,0.026660267512003583
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,10240,10240,0.10465280214945476
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,10240,2048,0.020499199628829956
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,10240,2048,0.024247467517852783
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,10240,1536,0.01566933294137319
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,10240,1536,0.02230506738026937
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,10240,7168,0.07855679988861083
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,10240,1024,0.011352533102035522
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,10240,1024,0.020300799608230592
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,10240,5120,0.05719893376032511
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,10240,768,0.00976746678352356
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,10240,4096,0.04577386776606242
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,10240,768,0.019127466281255088
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,10240,512,0.007911466558774312
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,10240,3584,0.04137386480967204
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,10240,512,0.017462400595347087
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,10240,512,0.0095360000928243
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,10240,256,0.005268266797065735
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,10240,256,0.01641813317934672
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,10240,128,0.004646400113900503
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,10240,128,0.015989333391189575
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,10240,3072,0.03472426732381185
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,10240,64,0.003991466760635376
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,10240,2560,0.03078826665878296
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,10240,64,0.01593706707159678
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,10240,2048,0.02600640058517456
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,10240,32,0.004166399935881296
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,10240,32,0.015917866428693136
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,10240,1536,0.01919999917348226
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,8192,16384,0.09776426951090494
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,8192,65536,0.24623680114746094
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,8192,65536,0.4150368054707845
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,10240,1024,0.014666666587193808
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,8192,16384,0.07244906425476075
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,8192,65536,0.36754347483317057
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,8192,12288,0.07579627037048339
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,8192,12288,0.058804265658060705
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,10240,768,0.012314666310946147
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,8192,10240,0.06392213503519693
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,8192,12288,0.0798858642578125
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,8192,10240,0.051680000623067227
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,8192,8192,0.05167680184046427
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,8192,10240,0.0673845370610555
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,8192,8192,0.04417386849721273
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,8192,7168,0.04574933449427287
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,8192,7168,0.04027093251546224
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,8192,6144,0.04085119962692261
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,10240,256,0.007584000130494435
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,8192,6144,0.03729706605275472
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,8192,5120,0.03450239896774292
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,8192,5120,0.03370773394902547
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,10240,128,0.007122133175532024
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,8192,4096,0.02843093276023865
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,8192,4096,0.02979946732521057
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,8192,4096,0.02998720010121663
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,8192,3584,0.025501867135365803
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,8192,3584,0.027365332841873168
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,8192,3072,0.0225600004196167
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,8192,3072,0.025362133979797363
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,8192,16384,0.10168960094451904
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,8192,2560,0.019441066185633342
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,8192,2560,0.023921066522598268
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,8192,8192,0.05395946502685547
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,8192,2560,0.021236266692479452
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,8192,2048,0.016192000110944113
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,8192,1536,0.012563199798266093
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,8192,2048,0.021886932849884033
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,8192,7168,0.047627735137939456
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,8192,1536,0.02046826680501302
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,8192,6144,0.04257706801096599
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,8192,1024,0.009505066275596618
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,8192,1024,0.018481065829594932
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,8192,768,0.008083199958006541
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,8192,5120,0.03619840145111084
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,8192,768,0.017749333381652833
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,8192,512,0.006337066491444905
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,8192,512,0.016265599926312765
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,8192,512,0.008516266942024231
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,8192,256,0.004344533383846283
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,8192,256,0.015987199544906617
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,8192,256,0.007483733197053273
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,8192,128,0.0038794666528701783
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,8192,3584,0.026945066452026368
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,8192,128,0.015173332889874777
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,8192,128,0.0067893331249554946
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,8192,64,0.0036085332433382668
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,8192,32,0.0038986665507157645
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,8192,64,0.01539413332939148
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,8192,3072,0.02355519930521647
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,8192,32,0.016131200393040977
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,7168,16384,0.08661759694417318
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,7168,65536,0.23027413686116538
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,7168,16384,0.06613440116246541
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,7168,65536,0.3562901178995768
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,7168,12288,0.06774506568908692
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,7168,16384,0.09534186522165934
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,7168,12288,0.05924266576766968
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,7168,10240,0.056909867127736415
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,7168,12288,0.07380053202311197
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,7168,10240,0.047406931718190506
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,7168,8192,0.045741868019104
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,7168,10240,0.061887999375661217
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,7168,8192,0.04076799949010213
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,7168,7168,0.04134399890899658
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,7168,7168,0.038228265444437665
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,7168,7168,0.045501867930094406
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,7168,6144,0.03635306755701701
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,7168,6144,0.0347541332244873
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,7168,6144,0.038761599858601885
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,7168,5120,0.031143466631571453
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,7168,5120,0.03185919920603435
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,7168,4096,0.025198932488759356
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,7168,4096,0.027721599737803145
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,8192,2048,0.01808213392893473
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,7168,4096,0.027875200907389326
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,8192,1536,0.014755200346310934
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,7168,3584,0.02318506638209025
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,7168,3584,0.02585066755612691
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,7168,3072,0.020424532890319824
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,8192,1024,0.011655466755231221
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,7168,3072,0.024124799172083537
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,8192,768,0.010181333621342976
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,7168,2560,0.017400532960891724
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,7168,2560,0.023110399643580117
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,7168,2560,0.01941653291384379
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,7168,2048,0.014193066954612732
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,7168,2048,0.021641600131988525
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,7168,2048,0.016321067015329996
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,7168,1536,0.011359999577204388
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,7168,1536,0.0200437327226003
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,7168,1024,0.00878613293170929
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,7168,1024,0.01798400084177653
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,7168,65536,0.3421354611714681
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,7168,768,0.008247466882069905
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,7168,768,0.016505600015322367
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,7168,8192,0.04989333152770996
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,7168,512,0.005658666789531708
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,7168,512,0.016223999857902526
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,7168,5120,0.03380053440729777
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,7168,256,0.004273066421349844
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,7168,256,0.01574186682701111
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,7168,3584,0.024999467531840007
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,7168,3072,0.022036266326904298
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,7168,128,0.0038293334345022834
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,7168,128,0.015675733486811318
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,7168,64,0.0036831999818483984
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,7168,64,0.01578986644744873
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,7168,32,0.003803733239571253
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,7168,1536,0.013537066181500754
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,7168,32,0.015731199582417806
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,7168,1024,0.010705066720644633
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,6144,65536,0.3083274523417155
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,6144,65536,0.20253119468688965
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,6144,16384,0.08982079823811849
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,6144,16384,0.06442453463872275
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,6144,12288,0.0679530700047811
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,6144,12288,0.05047893524169922
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,6144,10240,0.05887786547342936
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,7168,768,0.008847999572753906
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,6144,10240,0.04439786672592163
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,6144,10240,0.05890239874521891
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,6144,8192,0.04101973374684652
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,6144,8192,0.03815573453903198
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,6144,8192,0.04782293240229289
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,6144,7168,0.03669973214467366
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,6144,7168,0.03593920071919759
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,7168,512,0.007713066538174947
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,6144,7168,0.04140053192774455
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,6144,6144,0.032585599025090534
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,6144,6144,0.03286079963048299
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,6144,6144,0.03668479919433594
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,6144,5120,0.028011733293533327
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,6144,5120,0.02940479914347331
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,6144,4096,0.02299413283665975
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,6144,4096,0.025846399863560993
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,6144,4096,0.026624000072479247
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,6144,3584,0.0207914670308431
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,7168,256,0.006550399959087372
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,6144,3584,0.024873600403467814
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,6144,3584,0.023850667476654052
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,6144,3072,0.018478933970133463
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,6144,3072,0.023587199052174886
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,6144,3072,0.020709333817164104
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,6144,2560,0.01570026675860087
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,7168,128,0.005888000130653381
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,6144,2560,0.02241920034090678
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,6144,2048,0.012890666723251343
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,6144,1536,0.010520533720652262
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,6144,2048,0.021397332350413002
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,6144,1536,0.019630932807922365
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,6144,1024,0.008353066444396973
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,6144,1024,0.01834133267402649
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,6144,1024,0.0105813334385554
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,6144,768,0.007122133175532024
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,6144,768,0.017492266496022542
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,6144,768,0.00897706647713979
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,6144,512,0.0051925331354141235
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,6144,16384,0.08915627002716064
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,6144,65536,0.31849492390950523
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,6144,512,0.016355199615160625
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,6144,512,0.00772159993648529
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,6144,256,0.0042922665675481165
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,6144,12288,0.07011626561482748
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,6144,256,0.015742933750152587
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,6144,128,0.0037685332198937735
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,6144,64,0.0034976000587145484
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,6144,128,0.015356799960136414
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,6144,32,0.003643733263015747
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,6144,64,0.015494400262832641
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,6144,32,0.01565439999103546
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,5120,65536,0.24872213999430337
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,5120,65536,0.17508266766866049
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,5120,16384,0.07384426593780517
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,6144,5120,0.030849067370096843
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,5120,16384,0.05888746579488119
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,5120,12288,0.05889706611633301
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,5120,12288,0.04617280165354411
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,6144,2560,0.01849173307418823
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,6144,2048,0.015624533096949259
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,5120,10240,0.04975786606470744
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,5120,10240,0.041332264741261796
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,6144,1536,0.012865066528320312
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,5120,8192,0.043323731422424315
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,5120,8192,0.03599146604537964
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,5120,7168,0.03295680085817973
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,5120,7168,0.03347200155258179
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,6144,256,0.006484266618887584
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,5120,6144,0.02895786762237549
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,6144,128,0.006030933558940887
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,5120,6144,0.030358399947484332
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,5120,5120,0.02505813241004944
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,5120,5120,0.027471999327341717
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,5120,16384,0.08314560254414877
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,5120,65536,0.29679253896077473
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,5120,4096,0.022642133633295695
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,5120,4096,0.025106134017308553
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,5120,12288,0.06650133530298868
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,5120,4096,0.024614399671554564
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,5120,3584,0.01839146614074707
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,5120,3584,0.024115200837453207
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,5120,3584,0.02235306700070699
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,5120,3072,0.016124799847602844
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,5120,10240,0.05611093441645304
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,5120,3072,0.022637865940729775
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,5120,3072,0.01981653372446696
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,5120,2560,0.013798399766286214
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,5120,2560,0.022102399667104086
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,5120,2048,0.011532800396283467
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,5120,8192,0.04452586571375529
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,5120,2048,0.020307199160257975
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,5120,1536,0.009886933366457622
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,5120,1536,0.01926079988479614
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,5120,7168,0.038726401329040525
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,5120,1024,0.007901866734027863
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,5120,1024,0.016663466890652977
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,5120,768,0.006730666756629944
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,5120,768,0.016309332847595216
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,5120,6144,0.03500693241755168
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,5120,512,0.004911999901135763
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,5120,512,0.016242133577664693
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,5120,256,0.0042357335488001505
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,5120,256,0.015448533495267234
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,5120,256,0.006119466821352641
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,5120,128,0.0036778666079044344
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,5120,128,0.015221333503723145
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,5120,5120,0.028753066062927247
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,5120,64,0.0033887999753157297
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,5120,64,0.015363199512163797
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,5120,32,0.003469866762558619
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,5120,32,0.015383467078208923
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,4096,65536,0.20339412689208985
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,5120,2560,0.017781333128611247
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,5120,2048,0.014579199751218162
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,4096,65536,0.15026666323343912
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,4096,16384,0.05821866591771444
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,4096,65536,0.27160959243774413
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,5120,1536,0.012019200126330058
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,4096,16384,0.05307519833246867
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,4096,12288,0.04711360136667887
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,5120,1024,0.009346133470535279
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,4096,12288,0.04621973435084025
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,4096,10240,0.04105279843012492
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,4096,10240,0.03596906661987305
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,4096,10240,0.048791468143463135
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,5120,768,0.008117333551247915
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,4096,8192,0.03446293274561564
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,4096,8192,0.030618667602539062
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,4096,8192,0.03979413509368897
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,4096,7168,0.0335264007250468
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,4096,7168,0.02864426573117574
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,4096,7168,0.03502613306045532
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,4096,6144,0.029458133379618327
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,4096,6144,0.026371200879414875
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,4096,6144,0.031428267558415726
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,4096,5120,0.020385066668192543
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,4096,5120,0.0244704008102417
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,4096,4096,0.015873066584269204
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,4096,4096,0.022422399123509726
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,5120,512,0.006966400146484375
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,4096,3584,0.014261333147684732
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,4096,3584,0.021476266781489055
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,4096,3072,0.012565333644549051
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,4096,3072,0.020772266387939452
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,4096,2560,0.010748799641927083
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,4096,2560,0.019977599382400513
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,5120,128,0.005445333321889242
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,4096,2048,0.009297066926956176
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,4096,2048,0.018702934185663857
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,4096,16384,0.07331733703613282
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,4096,1536,0.008137600123882293
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,4096,12288,0.057639467716217044
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,4096,1536,0.017199999094009398
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,4096,1024,0.0065301333864529925
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,4096,5120,0.02648426691691081
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,4096,1024,0.016570666432380678
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,4096,4096,0.022362667322158813
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,4096,768,0.0054175997773806255
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,4096,3584,0.02066453297932943
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,4096,768,0.01618666648864746
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,4096,512,0.004610133171081543
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,4096,3072,0.018026665846506754
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,4096,512,0.015825066963831583
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,4096,256,0.003751466671625773
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,4096,256,0.006461866696675618
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,4096,2560,0.016040533781051636
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,4096,256,0.015386666854222616
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,4096,128,0.003479466587305069
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,4096,128,0.015052800377209982
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,4096,64,0.0032245332996050516
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,4096,32,0.0034485332667827605
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,4096,64,0.015918933351834617
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,4096,2048,0.013894400000572205
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,4096,32,0.01556373337904612
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,3584,65536,0.17957332928975422
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,3584,65536,0.13823359807332355
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,3584,16384,0.0517632007598877
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,3584,16384,0.04984746774037679
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,4096,1536,0.01185706655184428
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,3584,12288,0.0422272006670634
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,3584,12288,0.04380160172780355
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,4096,1024,0.008916266759236654
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,3584,10240,0.03657066822052002
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,4096,768,0.008060800035794576
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,3584,10240,0.03391786813735962
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,4096,512,0.0071733335653940845
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,3584,8192,0.03096746603647868
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,3584,8192,0.02916906674702962
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,3584,7168,0.02604159911473592
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,3584,7168,0.027703466018040974
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,4096,128,0.005825066566467285
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,3584,7168,0.03720106681187947
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,3584,6144,0.023871999979019166
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,3584,6144,0.025439999500910443
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,3584,6144,0.032228267192840575
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,3584,5120,0.01788053313891093
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,3584,5120,0.023971199989318848
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,3584,4096,0.014367999633153281
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,3584,4096,0.022194133202234904
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,3584,3584,0.012940800189971924
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,3584,3584,0.021270400285720824
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,3584,65536,0.2701237360636393
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,3584,3072,0.012811733285586038
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,3584,16384,0.08007253011067708
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,3584,3072,0.020230400562286376
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,3584,2560,0.010160000125567118
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,3584,2560,0.01966080069541931
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,3584,2560,0.016344533363978068
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,3584,2048,0.008713600039482117
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,3584,2048,0.018369066715240478
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,3584,12288,0.05975786844889323
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,3584,1536,0.0074986666440963745
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,3584,1536,0.016990933815638223
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,3584,10240,0.05103040138880412
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,3584,1024,0.005475200215975444
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,3584,8192,0.040411734580993654
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,3584,1024,0.016243199507395424
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,3584,768,0.004524800181388855
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,3584,768,0.01590933303038279
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,3584,5120,0.027670399347941084
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,3584,512,0.004009599983692169
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,3584,4096,0.023689599831899007
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,3584,512,0.015454933047294617
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,3584,256,0.003575466573238373
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,3584,256,0.015369600057601929
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,3584,3584,0.02081813414891561
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,3584,256,0.005850666761398315
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,3584,128,0.003323733309904734
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,3584,128,0.01520639955997467
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,3584,3072,0.018222934007644652
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,3584,64,0.0032981333633263906
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,3584,64,0.015387733777364096
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,3584,32,0.003373866776625315
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,3584,32,0.015236266454060874
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,3072,65536,0.15770773887634276
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,3072,65536,0.12858453591664631
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,3584,2048,0.013571199774742127
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,3072,16384,0.05006826718648275
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,3072,16384,0.046292265256245926
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,3072,12288,0.03760746717453003
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,3072,12288,0.03898880084355672
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,3584,1536,0.010963199536005656
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,3072,10240,0.03294720053672791
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,3072,10240,0.03618239959081014
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,3584,1024,0.008624000350634257
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,3072,8192,0.027662932872772217
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,3584,768,0.007420800129572551
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,3072,8192,0.031093333164850873
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,3072,7168,0.025931733846664428
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,3072,7168,0.02637653350830078
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,3072,7168,0.03508479992548625
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,3072,6144,0.023753599325815836
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,3584,512,0.006584533552328746
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,3072,6144,0.024233599503835045
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,3072,5120,0.016786134243011473
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,3072,5120,0.023057067394256593
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,3584,128,0.005305600166320801
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,3072,4096,0.013978667060534158
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,3072,4096,0.021406932671864828
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,3072,65536,0.2609130700429281
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,3072,16384,0.07673280239105225
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,3072,3584,0.013358933726946512
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,3072,3584,0.020779732863108316
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,3072,3072,0.010779733459154766
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,3072,12288,0.05833386580149332
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,3072,3072,0.01999893387158712
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,3072,3072,0.017571200927098594
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,3072,2560,0.009566932916641235
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,3072,2560,0.019063466787338258
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,3072,2048,0.008294400076071422
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,3072,10240,0.050137599309285484
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,3072,2048,0.018687999248504637
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,3072,1536,0.0071936001380284624
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,3072,8192,0.03901653289794922
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,3072,1536,0.016371200482050575
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,3072,6144,0.03178453246752421
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,3072,1024,0.005201066533724466
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,3072,1024,0.015853866934776306
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,3072,5120,0.02629973292350769
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,3072,768,0.004496000210444133
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,3072,768,0.016420267025629678
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,3072,4096,0.022130133708318074
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,3072,512,0.004033066580692927
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,3072,512,0.015382400155067444
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,3072,3584,0.02037866711616516
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,3072,256,0.003549866626660029
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,3072,256,0.015037866433461508
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,3072,2560,0.015965867042541503
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,3072,128,0.0032661333680152893
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,3072,128,0.01516266663869222
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,3072,2048,0.013079466422398886
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,3072,64,0.003269333392381668
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,3072,64,0.01511146624883016
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,3072,1536,0.010487467050552368
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,3072,32,0.0032543999453385672
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,3072,32,0.015310933192571005
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,2560,65536,0.11293973128000896
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,2560,65536,0.13611520131429036
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,2560,16384,0.04008426666259766
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,2560,65536,0.25267093976338706
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,2560,16384,0.04261759916941325
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,2560,16384,0.07497279644012451
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,2560,12288,0.03264320095380147
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,2560,12288,0.03599466482798259
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,3072,1024,0.00848640004793803
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,2560,10240,0.028864000240961713
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,2560,10240,0.03234453399976094
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,2560,8192,0.02438933253288269
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,2560,8192,0.029224532842636108
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,2560,8192,0.038985598087310794
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,2560,7168,0.02366080085436503
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,2560,7168,0.028034132719039918
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,2560,7168,0.033763198057810466
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,3072,768,0.007180800040562947
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,2560,6144,0.02067413330078125
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,2560,5120,0.01858560045560201
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,2560,6144,0.0256117324034373
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,2560,6144,0.0305184006690979
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,2560,5120,0.022072533766428627
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,2560,5120,0.025297067562739056
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,2560,4096,0.015125333269437154
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,3072,512,0.0065184002121289565
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,2560,4096,0.021757866938908896
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,2560,3584,0.011473066608111064
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,2560,3584,0.020188800493876138
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,2560,3072,0.010798933108647664
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,2560,3072,0.019317332903544107
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,2560,2560,0.008943999807039898
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,2560,2560,0.017869865894317626
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,2560,2560,0.015317333738009134
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,3072,256,0.005676800012588501
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,2560,2048,0.0077450667818387345
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,2560,2048,0.017194666465123496
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,2560,1536,0.006820266445477803
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,2560,1536,0.016639999548594155
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,3072,128,0.005161599814891815
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,2560,1024,0.0050005331635475155
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,2560,1024,0.016306133071581522
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,2560,12288,0.05754453341166178
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,2560,768,0.004520533482233683
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,2560,10240,0.049998935063680014
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,2560,768,0.016054399808247886
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,2560,512,0.004031999905904134
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,2560,512,0.01646080017089844
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,2560,4096,0.021699200073877968
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,2560,256,0.0035584000249703727
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,2560,256,0.015020799636840821
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,2560,3584,0.019989333550135293
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,2560,256,0.005684266487757364
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,2560,3072,0.017244799931844076
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,2560,128,0.0032543999453385672
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,2560,128,0.014841600259145101
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,2560,64,0.0030165334542592366
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,2560,64,0.014913066228230795
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,2560,32,0.0030794667700926462
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,2560,32,0.015032533804575601
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,2560,2048,0.012612266341845193
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,2048,65536,0.10703999996185302
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,2048,65536,0.10086507002512615
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,2048,16384,0.033779199918111166
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,2048,16384,0.03858773310979207
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,2560,1536,0.010478933652242024
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,2048,12288,0.02759786645571391
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,2048,12288,0.03498880068461101
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,2560,1024,0.008238933483759562
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,2048,10240,0.024210133155186973
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,2048,10240,0.031220267216364544
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,2048,8192,0.020041600863138834
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,2048,8192,0.027688533067703247
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,2560,768,0.00721919983625412
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,2048,7168,0.021511467297871907
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,2048,7168,0.026202666759490966
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,2560,512,0.006260266900062561
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,2560,128,0.005087999999523163
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,2048,6144,0.018568533658981323
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,2048,6144,0.024500266710917155
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,2048,65536,0.22659734090169273
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,2048,16384,0.06349440018335978
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,2048,12288,0.04994026819864909
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,2048,5120,0.01625920037428538
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,2048,5120,0.022712532679239908
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,2048,10240,0.04250986576080322
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,2048,4096,0.01341759959856669
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,2048,4096,0.02072746753692627
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,2048,3584,0.012314666310946147
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,2048,3584,0.020396800835927327
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,2048,3072,0.011188266674677531
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,2048,8192,0.035947732130686444
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,2048,3072,0.019287467002868652
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,2048,2560,0.008542933066685994
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,2048,2560,0.017391999562581383
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,2048,7168,0.03137279947598775
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,2048,2048,0.007727999985218048
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,2048,2048,0.01732906699180603
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,2048,1536,0.00631039987007777
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,2048,1536,0.01634239951769511
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,2048,6144,0.026727465788523357
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,2048,1024,0.004650666813055674
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,2048,1024,0.015794133146603904
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,2048,5120,0.021997867027918498
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,2048,768,0.004296533266703288
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,2048,4096,0.01919680039087931
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,2048,768,0.015871999661127727
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,2048,512,0.003832533210515976
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,2048,3584,0.01798400084177653
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,2048,512,0.015383467078208923
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,2048,256,0.0034400001168251038
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,2048,3072,0.01532799998919169
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,2048,256,0.015043200055758158
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,2048,256,0.005724800129731497
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,2048,128,0.0031946666538715364
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,2048,128,0.014782933394114175
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,2048,2560,0.013736533125241599
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,2048,64,0.003047466774781545
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,2048,32,0.0031968000034491217
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,2048,64,0.01495573321978251
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,2048,32,0.015075199802716575
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,1536,65536,0.08888533115386962
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,1536,65536,0.09368746280670166
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,1536,16384,0.027569067478179932
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,2048,2048,0.011353600025177001
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,1536,16384,0.03998719851175944
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,1536,16384,0.06895679632822672
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,1536,12288,0.02241920034090678
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,2048,1536,0.010013866424560546
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,1536,12288,0.03250666658083598
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,1536,10240,0.025779199600219727
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,1536,10240,0.02956266601880391
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,1536,8192,0.021406932671864828
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,1536,8192,0.02646080056826274
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,2048,1024,0.007964799801508587
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,1536,7168,0.01888426740964254
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,1536,7168,0.025330134232838947
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,2048,768,0.007118933399518331
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,1536,6144,0.016950400670369466
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,2048,512,0.006313600142796834
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,1536,6144,0.024547199408213295
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,1536,6144,0.028845866521199543
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,1536,5120,0.014966400464375815
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,1536,5120,0.021154133478800456
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,2048,128,0.005118933320045471
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,1536,4096,0.01249066690603892
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,1536,4096,0.019784533977508546
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,1536,3584,0.011374933520952861
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,1536,3584,0.019092265764872232
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,1536,3584,0.018411733706792197
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,1536,3072,0.010163199901580811
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,1536,3072,0.018181333939234413
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,1536,65536,0.23053654034932455
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,1536,2560,0.007392000158627827
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,1536,2560,0.01762346625328064
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,1536,12288,0.053725866476694736
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,1536,2048,0.006714666883150737
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,1536,10240,0.04562453428904216
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,1536,2048,0.01744426687558492
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,1536,1536,0.005338666836420695
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,1536,1536,0.016316800316174825
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,1536,8192,0.03824959993362427
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,1536,1536,0.009965866804122925
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,1536,1024,0.004448000093301137
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,1536,1024,0.015868799885114034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,1536,768,0.004127999891837438
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,1536,768,0.01602240006128947
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,1536,7168,0.03243199984232585
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,1536,512,0.0036938667297363283
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,1536,512,0.015149866541226705
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,1536,5120,0.024924800793329874
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,1536,256,0.0033791999022165934
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,1536,4096,0.020823466777801513
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,1536,256,0.014769066373507181
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,1536,128,0.003102933367093404
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,1536,128,0.01479573349157969
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,1536,128,0.004886400202910105
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,1536,3072,0.015915733575820924
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,1536,64,0.003272533416748047
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,1536,64,0.014811733365058899
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,1536,32,0.003092266619205475
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,1536,32,0.014986667037010192
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,1536,2560,0.014286933342615762
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,1024,65536,0.061135999361673986
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,1024,65536,0.07684480349222819
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,1536,2048,0.011827199657758077
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,1024,16384,0.02038080096244812
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,1024,16384,0.03213760058085124
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,1024,16384,0.05863680044809977
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,1024,12288,0.018637865781784058
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,1024,12288,0.027924267450968425
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,1536,1024,0.008042666812737782
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,1024,10240,0.018331732352574667
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,1024,10240,0.02617173393567403
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,1024,8192,0.015118933717409768
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,1536,768,0.0069909334182739254
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,1024,8192,0.02382826606432597
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,1024,7168,0.013819733262062072
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,1024,7168,0.02293973366419474
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,1536,512,0.006232533355553945
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,1024,6144,0.012745599945386252
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,1024,6144,0.02186453342437744
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,1536,256,0.005335466563701629
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,1024,5120,0.011222400267918905
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,1024,5120,0.020965333779652914
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,1024,4096,0.00962453285853068
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,1024,65536,0.20947200457255044
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,1024,4096,0.01867413322130839
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,1024,4096,0.018013866742451985
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,1024,3584,0.00876693328221639
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,1024,12288,0.04628693262736003
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,1024,3584,0.018897066513697304
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,1024,3584,0.016603733102480568
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,1024,3072,0.0083146666487058
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,1024,3072,0.018332799275716148
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,1024,2560,0.00728000005086263
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,1024,10240,0.03928320010503133
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,1024,2560,0.017605332533518474
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,1024,8192,0.034423466523488364
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,1024,2048,0.006208000083764395
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,1024,2048,0.01713706652323405
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,1024,1536,0.005427200098832448
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,1024,1536,0.01623679995536804
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,1024,7168,0.028964267174402876
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,1024,1024,0.004518400132656098
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,1024,1024,0.01594986617565155
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,1024,1024,0.007582933207352956
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,1024,768,0.004053333401679992
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,1024,768,0.015736533204714458
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,1024,768,0.006804266571998596
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,1024,512,0.0036447999378045404
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,1024,512,0.01569066643714905
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,1024,6144,0.02498133381207784
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,1024,256,0.003252266595760981
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,1024,256,0.005430399874846141
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,1024,256,0.014776532848676046
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,1024,128,0.0030037333567937215
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,1024,128,0.01470186710357666
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,1024,5120,0.020984532435735066
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,1024,64,0.0028362666567166646
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,1024,64,0.014569600423177084
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,1024,32,0.0028746667007605235
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,1024,32,0.014656000336011252
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,768,65536,0.05026880105336508
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,768,65536,0.0702069362004598
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,768,16384,0.019927465915679933
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,768,65536,0.1776810646057129
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,768,16384,0.028735999266306562
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,768,12288,0.017656532923380534
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,768,12288,0.027345067262649535
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,1024,3072,0.014552533626556396
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,1024,2560,0.013160533706347146
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,768,10240,0.01442026694615682
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,768,10240,0.025044266382853193
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,768,10240,0.03372160196304321
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,1024,2048,0.010999466975529988
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,768,8192,0.012427733341852824
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,768,8192,0.02318399945894877
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,768,7168,0.012001066406567892
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,1024,1536,0.009336533149083455
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,768,7168,0.02291626731554667
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,768,6144,0.012008532881736755
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,768,6144,0.021667200326919555
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,768,5120,0.010529067118962605
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,1024,512,0.006098133325576782
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,768,5120,0.01972800095876058
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,768,4096,0.009048533439636231
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,768,4096,0.01888426740964254
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,1024,128,0.004855466882387797
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,768,3584,0.008322133123874665
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,768,3584,0.01839146614074707
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,768,3584,0.014533332983652749
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,768,3072,0.007349333167076111
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,768,3072,0.01801066597302755
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,768,16384,0.05018026828765869
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,768,2560,0.006471466521422069
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,768,2560,0.017170133193333943
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,768,12288,0.03938133319218953
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,768,8192,0.028797866900761922
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,768,7168,0.024684800704320272
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,768,2048,0.005713066458702088
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,768,2048,0.01665493349234263
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,768,1536,0.004995200037956238
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,768,6144,0.021526400248209634
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,768,1536,0.01616853376229604
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,768,1024,0.004274133344491323
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,768,5120,0.018438400824864705
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,768,1024,0.015562666455904641
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,768,768,0.003964799890915553
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,768,768,0.015635200341542563
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,768,4096,0.015494400262832641
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,768,512,0.003533866753180822
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,768,512,0.015251200397809347
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,768,3072,0.012361600001653036
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,768,256,0.0032373333970705668
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,768,256,0.014869333306948344
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,768,2560,0.011199999849001567
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,768,128,0.002994133283694585
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,768,128,0.014706133802731832
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,768,128,0.004941866795221964
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,768,64,0.0028778667251269023
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,768,32,0.0028490667541821797
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,768,64,0.014665599664052328
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,768,32,0.01460693379243215
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,512,65536,0.038629333178202316
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,512,65536,0.06442346572875976
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,512,65536,0.14296852747599284
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,512,16384,0.014569600423177084
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,512,16384,0.027663999795913698
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,512,16384,0.04073066711425781
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,512,12288,0.012777599692344665
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,512,12288,0.02400426665941874
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,768,2048,0.009666132926940917
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,512,10240,0.011867733796437581
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,512,10240,0.022822399934132896
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,768,1536,0.008497066299120585
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,512,8192,0.011080533266067505
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,512,8192,0.02034133275349935
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,512,8192,0.021976532538731892
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,768,1024,0.00695360004901886
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,512,7168,0.010427733262379963
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,512,7168,0.020802134275436403
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,512,7168,0.01834986607233683
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,512,6144,0.009662933150927226
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,768,768,0.006258133550484974
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,512,6144,0.019977599382400513
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,512,6144,0.017383466164271034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,512,5120,0.008700799942016602
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,512,5120,0.019426133235295615
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,512,4096,0.007650133470694225
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,512,4096,0.019002666076024376
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,768,512,0.005635199944178263
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,512,3584,0.007285333176453908
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,512,3584,0.018157867590586345
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,512,3072,0.006925866504510244
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,512,3072,0.01817173361778259
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,512,2560,0.006237866481145223
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,512,2560,0.017232000827789307
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,512,2560,0.00925439993540446
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,768,256,0.005339733262856802
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,512,2048,0.005490133166313171
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,512,2048,0.016714666287104288
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,512,2048,0.008096000055472057
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,512,1536,0.004863999783992767
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,512,1536,0.016730666160583496
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,512,1024,0.004149333387613296
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,512,1024,0.015927466750144958
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,512,1024,0.006205866734186808
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,512,768,0.0038762666285037995
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,512,768,0.015476266543070475
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,512,12288,0.031863466898600264
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,512,512,0.0035605333745479585
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,512,512,0.015389866630236306
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,512,10240,0.027266132831573486
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,512,256,0.0031669333577156065
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,512,256,0.014842666188875833
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,512,256,0.005004799862702688
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,512,128,0.002985599885384242
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,512,128,0.01463573376337687
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,512,128,0.004858666658401489
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,512,64,0.002865066627661387
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,512,4096,0.012526933352152506
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,512,5120,0.014691199858983359
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,512,32,0.002791466563940048
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,512,64,0.014413866400718688
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,512,3584,0.011617066462834676
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,256,65536,0.029406932989756267
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,512,32,0.014432000120480857
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,512,3072,0.009567999839782714
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,256,16384,0.009457066655158997
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,256,16384,0.02376746733983358
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,256,65536,0.058696532249450685
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,256,16384,0.026760532458623247
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,256,12288,0.009363200267155964
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,256,12288,0.021511467297871907
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,256,10240,0.008643200000127155
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,256,8192,0.007427200178305308
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,256,10240,0.02095573345820109
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,256,8192,0.020439465840657554
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,256,8192,0.01530880033969879
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,256,7168,0.006818133095900218
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,512,1536,0.00730560024579366
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,256,7168,0.019985065857569376
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,256,6144,0.006587733328342438
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,256,6144,0.01996799906094869
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,256,6144,0.012534399827321371
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,256,5120,0.00747626672188441
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,256,5120,0.0192522664864858
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,512,768,0.005798399945100148
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,256,4096,0.0067669332027435304
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,256,4096,0.018745599190394084
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,512,512,0.005430399874846141
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,256,3584,0.006657066444555919
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,256,3584,0.018224000930786133
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,256,65536,0.09821013609568277
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,256,3072,0.006259199976921081
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,256,3072,0.01746986707051595
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,256,12288,0.02143146594365438
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,256,2560,0.006105599800745646
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,256,10240,0.018310399850209554
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,256,2560,0.017249067624409996
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,256,2560,0.008235733211040496
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,256,2048,0.00544106662273407
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,256,2048,0.016570666432380678
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,256,1536,0.004744533201058706
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,256,1536,0.016515200336774193
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,256,7168,0.013809067010879517
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,256,1024,0.004175999760627746
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,256,1024,0.005959466596444448
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,256,1024,0.015476266543070475
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,256,768,0.0038058665891488397
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,256,768,0.015470932920773825
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,256,768,0.005544533332188925
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,256,512,0.003385599950949351
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,256,512,0.014971733093261719
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,256,5120,0.011314133803049724
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,256,256,0.0031146667897701263
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,256,256,0.014657066265741984
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,256,256,0.00483840008576711
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,256,4096,0.010089600086212158
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,256,128,0.002845866729815801
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,256,128,0.014565333724021912
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,256,64,0.002775466690460841
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,256,32,0.0027744000156720476
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,256,64,0.01451520025730133
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,256,32,0.014467199643452963
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,128,65536,0.016289066274960837
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,128,65536,0.05092266798019409
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,128,65536,0.09994346300760905
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,256,3584,0.00967680017153422
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,128,16384,0.008132266501585644
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,128,16384,0.022798933585484824
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,128,16384,0.026345600684483845
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,128,12288,0.007387733459472657
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,128,10240,0.0067775999506314594
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,128,12288,0.021001599232355752
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,128,12288,0.02082560062408447
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,128,10240,0.020653865734736123
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,128,8192,0.006714666883150737
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,128,8192,0.019697066148122153
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,128,7168,0.0064629331231117245
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,128,7168,0.01981333295504252
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,256,3072,0.008597333232561748
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,128,6144,0.006129066646099091
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,128,6144,0.019106133778889974
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,256,2048,0.0071722666422526045
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,256,1536,0.006876799960931141
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,128,5120,0.006621866424878438
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,128,5120,0.019581866264343262
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,128,5120,0.011382399996121725
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,128,4096,0.006171733140945435
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,128,4096,0.01844053268432617
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,128,3584,0.006534400085608165
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,128,3584,0.01800959904988607
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,128,3584,0.00957973301410675
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,256,512,0.005226666728655497
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,128,3072,0.006251733501752217
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,128,3072,0.00843519965807597
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,128,3072,0.017641599973042807
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,128,2560,0.006001066664854685
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,128,2560,0.0169706662495931
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,256,128,0.004779733220736186
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,128,2048,0.0053610667586326596
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,128,2048,0.007113599777221679
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,128,2048,0.016365866859753928
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,128,1536,0.004635733366012573
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,128,1536,0.016217600305875143
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,128,1536,0.0068896000583966565
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,128,1024,0.0040522667268912
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,128,1024,0.015191466609636942
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,128,768,0.003697066754102707
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,128,10240,0.017933867375055947
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,128,768,0.015477333466211954
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,128,512,0.003399466723203659
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,128,768,0.005566933254400889
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,128,512,0.01490239997704824
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,128,8192,0.015240533153216043
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,128,512,0.005220266679922739
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,128,256,0.0030303999781608583
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,128,256,0.014922666549682616
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,128,7168,0.01383680005868276
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,128,128,0.002880000074704488
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,128,128,0.004717866579691568
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,128,128,0.014383999506632486
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,128,64,0.002735999971628189
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,128,64,0.014616533120473226
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,128,32,0.002752000093460083
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,128,32,0.014525866508483887
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,64,65536,0.010981333255767823
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,64,16384,0.006728533407052357
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,64,65536,0.046996267636617024
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,128,6144,0.012497066458066305
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,64,12288,0.006443733473618825
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,64,16384,0.02246933380762736
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,64,12288,0.020696532726287842
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,64,10240,0.006503466765085857
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,64,10240,0.020862932999928793
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,64,8192,0.006289066871007283
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,64,8192,0.01961173415184021
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,64,7168,0.00622080018122991
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,128,4096,0.009983999530474345
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,64,6144,0.006026666859785716
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,64,5120,0.00650133341550827
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,64,7168,0.020146133502324422
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,64,6144,0.019001599152882895
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,64,4096,0.0060810665289560955
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,64,5120,0.019232000907262167
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,64,3584,0.006494933366775512
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,64,4096,0.018288000424702962
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,64,3072,0.0061258668700853985
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,64,3584,0.0179967999458313
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,64,2560,0.006002133091290792
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,64,3072,0.017905066410700478
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,64,2560,0.016812799374262492
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,64,2048,0.005358933409055074
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,128,2560,0.008121599753697712
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,64,2048,0.016775466998418174
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,64,1536,0.004724266628424326
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,64,1536,0.015983999768892924
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,64,1024,0.004075733323891958
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,64,768,0.0036159999668598174
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,64,1024,0.015546666582425437
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,64,256,0.0030080000559488933
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,64,512,0.003390933324893316
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,64,768,0.015542399883270264
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,64,512,0.015330132842063905
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,64,128,0.002844800055027008
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,64,256,0.014517333110173544
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,64,128,0.014545067151387533
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,64,32,0.002609066665172577
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,64,64,0.0027295999228954316
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,128,1024,0.005948799848556519
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,64,64,0.014642133315404256
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,64,32,0.01425386667251587
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,32,65536,0.010487467050552368
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,32,65536,0.04586986700693767
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,32,16384,0.0063509335120519
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,32,12288,0.00643093337615331
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,32,16384,0.023100799322128295
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,32,12288,0.020615466435750327
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,32,10240,0.006437333424886067
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,32,7168,0.006150400141874949
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,32,10240,0.021091200908025107
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,32,8192,0.006362666686375936
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,160,128,256,0.004961066444714864
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,32,8192,0.019989333550135293
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,32,7168,0.019884800910949706
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,32,6144,0.0059338668982187905
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,32,6144,0.01927466591199239
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,32,5120,0.006276266773541768
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,32,3584,0.01834026575088501
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,32,4096,0.00586346685886383
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,32,5120,0.019341866175333657
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,32,4096,0.018531199296315512
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,32,3584,0.006375466783841451
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,32,3072,0.006049066781997681
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,32,3072,0.01758506695429484
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,32,2560,0.0059690664211908976
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,32,2560,0.01697493394215902
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,32,2048,0.005364266534646353
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,32,1536,0.004691199958324432
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,32,2048,0.017079466581344606
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,32,1536,0.01598186691602071
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,32,1024,0.003942399968703588
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,32,1024,0.015577600399653117
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,32,768,0.003568000098069509
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,32,512,0.0033834666013717652
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,32,768,0.015532799561818442
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,32,512,0.015079466501871744
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,32,256,0.0030048000315825146
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,32,256,0.014600533246994018
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,32,128,0.0027935999135176343
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,32,128,0.01440000037352244
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,32,32,0.014526933431625366
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,32,64,0.0026858667532602947
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,160,32,32,0.002656000107526779
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,160,32,64,0.01451520025730133
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,65536,12288,0.3152736028035482
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,65536,16384,0.41111148198445635
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,65536,10240,0.49010346730550125
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,65536,12288,0.5825087865193684
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,65536,16384,0.7673632303873699
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,65536,10240,0.23995733261108398
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,65536,10240,0.2657866636912028
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,65536,8192,0.2281269391377767
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,65536,8192,0.3915818532307943
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,65536,8192,0.2237781365712484
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,65536,7168,0.19320747057596843
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,65536,7168,0.1731594721476237
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,65536,7168,0.34377279281616213
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,65536,6144,0.16939306259155273
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,65536,6144,0.29919681549072263
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,65536,5120,0.12894186973571778
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,65536,5120,0.24974506696065268
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,65536,5120,0.14402027130126954
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,65536,4096,0.20295893351236977
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,65536,4096,0.12095680236816406
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,65536,3584,0.10921173095703125
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,65536,3584,0.1782378673553467
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,65536,3072,0.15479040145874023
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,65536,3072,0.09647573630015055
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,65536,3072,0.08348053296407064
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,65536,2560,0.13003520170847577
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,65536,2560,0.08446826934814453
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,65536,12288,0.28472747802734377
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,65536,2048,0.10670506954193115
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,65536,2048,0.0719871997833252
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,65536,16384,0.3763423919677734
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,65536,2048,0.06573760112126668
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,65536,1536,0.0828010638554891
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,65536,1536,0.059596800804138185
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,65536,1024,0.05865386724472046
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,65536,1024,0.04640640020370483
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,65536,1024,0.03579733371734619
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,65536,768,0.045994667212168376
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,65536,768,0.03991039991378784
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,65536,512,0.03394986788431804
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,65536,512,0.03204693396886189
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,65536,512,0.024613332748413087
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,65536,256,0.019935999313990274
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,65536,256,0.024657066663106283
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,65536,256,0.018708266814549766
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,65536,128,0.011777066191037496
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,65536,128,0.021894399325052896
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,65536,64,0.012898133198420206
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,65536,64,0.02089280088742574
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,65536,32,0.013936000068982443
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,65536,6144,0.1511210600535075
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,65536,32,0.02091946601867676
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,65536,4096,0.10582506656646729
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,16384,65536,0.4134378751118978
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,65536,3584,0.09592106342315673
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,16384,65536,0.7848320007324219
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,16384,16384,0.19242026011149088
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,16384,16384,0.11814186573028565
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,16384,16384,0.11941546599070232
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,16384,12288,0.14657920201619465
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,16384,12288,0.09346240361531576
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,65536,2560,0.07205973466237386
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,16384,10240,0.08054933547973633
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,16384,12288,0.08077440261840821
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,16384,10240,0.12347839673360188
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,16384,10240,0.06749119758605956
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,16384,8192,0.0994271993637085
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,16384,8192,0.06907947063446045
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,16384,7168,0.08717013200124105
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,65536,1536,0.05301119883855184
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,16384,8192,0.05598186651865641
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,16384,7168,0.0622101346651713
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,16384,6144,0.07660160064697266
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,16384,6144,0.05574933290481567
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,65536,768,0.03237013419469197
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,16384,5120,0.06467093229293823
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,16384,5120,0.04949653148651123
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,16384,4096,0.05295466581980387
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,16384,4096,0.043323731422424315
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,16384,4096,0.032758400837580366
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,16384,3584,0.0468394676844279
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,65536,128,0.016357333461443583
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,16384,3072,0.04131093422571818
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,16384,3584,0.040439466635386154
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,16384,3072,0.026692267258961993
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,16384,3072,0.03652906815210978
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,16384,2560,0.03534719944000244
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,16384,2560,0.03337920109430949
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,16384,2560,0.02327466607093811
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,16384,2048,0.029329067468643187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,16384,2048,0.029267199834187824
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,16384,2048,0.01997973322868347
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,16384,1536,0.023382399479548135
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,16384,1536,0.025310933589935303
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,16384,1024,0.018467199802398682
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,16384,1024,0.021951999266942343
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,16384,768,0.012619733810424805
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,16384,768,0.020631466309229532
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,16384,65536,0.3811317443847656
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,16384,512,0.01055680016676585
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,16384,6144,0.04478400150934855
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,16384,5120,0.038646399974823
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,16384,512,0.01914026737213135
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,16384,512,0.010009599725405376
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,16384,256,0.006775466601053874
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,16384,256,0.016361600160598753
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,16384,128,0.0046634669105211895
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,16384,3584,0.029781333605448407
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,16384,128,0.015948800245920818
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,16384,64,0.004322133461634318
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,16384,128,0.007214933137098948
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,16384,32,0.0047541335225105286
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,16384,64,0.016506666938463845
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,16384,32,0.016421332955360413
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,16384,1536,0.01625706652800242
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,16384,768,0.011829333504041036
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,12288,65536,0.3396341323852539
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,16384,7168,0.05035733381907145
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,12288,65536,0.5837856292724609
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,12288,16384,0.14241387049357096
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,12288,16384,0.09219093322753906
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,12288,12288,0.0741482655207316
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,12288,12288,0.12684693336486816
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,12288,10240,0.0919327974319458
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,12288,10240,0.06417066653569539
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,12288,10240,0.05336320002873739
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,12288,8192,0.07387519677480062
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,12288,8192,0.05537279844284058
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,16384,1024,0.012935466567675271
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,12288,7168,0.06535786787668864
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,12288,7168,0.05089813470840454
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,12288,6144,0.057309865951538086
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,12288,6144,0.0460426648457845
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,12288,5120,0.048733866214752196
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,12288,5120,0.040590933958689374
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,12288,5120,0.030469334125518797
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,16384,256,0.007416533430417378
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,12288,4096,0.0400607983271281
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,12288,4096,0.025975465774536133
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,12288,4096,0.03577386538187663
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,12288,3584,0.03556160132090251
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,12288,3584,0.03354453245798747
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,12288,3584,0.023375999927520753
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,12288,3072,0.0314794659614563
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,12288,3072,0.030910933017730714
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,12288,3072,0.020992000897725425
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,12288,2560,0.026894932985305785
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,12288,2560,0.027955200274785357
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,12288,2048,0.022402133544286093
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,12288,2048,0.025156267484029132
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,12288,65536,0.29585065841674807
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,12288,1536,0.02003306746482849
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,12288,16384,0.08141333262125651
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,12288,1536,0.013157332936922709
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,12288,1536,0.022267733017603555
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,12288,12288,0.062554665406545
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,12288,1024,0.012277332941691081
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,12288,1024,0.020197333892186482
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,12288,768,0.010059733192125957
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,12288,768,0.019080533583958944
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,12288,768,0.010317867000897724
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,12288,512,0.007750399907430013
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,12288,8192,0.04424639940261841
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,12288,512,0.007130666573842366
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,12288,512,0.017002665996551515
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,12288,256,0.004886400202910105
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,12288,7168,0.03959999879201253
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,12288,256,0.015879467129707336
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,12288,256,0.006479999919732411
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,12288,128,0.004108799993991852
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,12288,128,0.01565120021502177
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,12288,128,0.006075733403364817
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,12288,64,0.003937066594759623
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,12288,6144,0.03532480001449585
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,12288,64,0.015989333391189575
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,12288,32,0.004440533121426901
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,12288,32,0.01590826710065206
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,10240,16384,0.12059520085652668
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,10240,16384,0.08128853638966879
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,10240,12288,0.09261013666788737
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,10240,65536,0.3036693255106608
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,10240,65536,0.49523305892944336
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,10240,12288,0.06577599843343099
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,10240,10240,0.09075626532236734
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,10240,10240,0.05751039981842041
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,12288,2560,0.018461867173512777
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,10240,8192,0.06300479968388875
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,10240,8192,0.04918826818466186
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,12288,2048,0.015553067127863566
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,10240,7168,0.05568106571833292
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,10240,7168,0.04506133397420247
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,10240,7168,0.03420693476994832
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,10240,6144,0.04908693234125773
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,10240,6144,0.04118293523788452
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,10240,5120,0.04165973265965779
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,10240,5120,0.0370197335879008
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,10240,5120,0.026430932680765788
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,10240,4096,0.03414186636606852
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,12288,1024,0.011307733257611592
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,10240,4096,0.032494932413101196
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,10240,4096,0.02253119945526123
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,10240,3584,0.030433066685994464
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,10240,3584,0.03067626754442851
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,10240,16384,0.07028906345367432
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,10240,65536,0.25467947324117024
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,10240,3072,0.026858667532602947
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,10240,12288,0.05407893260320028
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,10240,3072,0.028266666332880656
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,10240,2560,0.023286400238672893
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,10240,2560,0.02540053327878316
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,10240,2560,0.015732266505559287
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,10240,2048,0.0191210667292277
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,10240,10240,0.04619199832280477
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,10240,2048,0.02364586591720581
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,10240,2048,0.013550933202107748
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,10240,1536,0.01490133305390676
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,10240,1536,0.021388800938924153
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,10240,8192,0.03811306556065877
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,10240,1536,0.011687466502189636
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,10240,1024,0.01064639985561371
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,10240,1024,0.019489065806070963
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,10240,1024,0.009985066453615824
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,10240,768,0.008949333429336548
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,10240,768,0.01838399966557821
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,10240,512,0.007039999961853028
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,10240,512,0.01625279982884725
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,10240,256,0.004735999802748362
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,10240,6144,0.030426667133967085
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,10240,256,0.015608533223470052
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,10240,128,0.00393599991997083
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,10240,128,0.015337600310643514
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,10240,64,0.003638399889071783
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,10240,64,0.015783466895421348
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,10240,32,0.00412266676624616
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,10240,32,0.01581760048866272
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,10240,3584,0.020430932442347206
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,8192,65536,0.4210207939147949
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,8192,65536,0.23848533630371094
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,8192,16384,0.12522239685058595
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,8192,16384,0.06924266815185547
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,8192,12288,0.07460052967071533
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,8192,12288,0.05648213227589925
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,10240,3072,0.017874133586883546
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,8192,10240,0.06288426717122396
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,8192,10240,0.049459199110666915
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,10240,768,0.008457600076993307
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,8192,8192,0.05087893406550089
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,10240,512,0.00653546651204427
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,8192,8192,0.04405333201090495
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,10240,256,0.005974400043487549
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,10240,128,0.005525333185990652
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,8192,7168,0.04497919877370198
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,8192,7168,0.03973546822865804
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,8192,6144,0.039692799250284835
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,8192,6144,0.03638933499654134
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,8192,5120,0.03392639954884847
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,8192,5120,0.032833067576090495
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,8192,65536,0.21313813527425132
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,8192,4096,0.031667200724283855
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,8192,16384,0.058083200454711915
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,8192,4096,0.02899199922879537
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,8192,3584,0.02502826650937398
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,8192,3584,0.026837333043416338
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,8192,12288,0.045287466049194335
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,8192,3072,0.022080000241597494
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,8192,3072,0.0247978667418162
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,8192,10240,0.03870933453241984
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,8192,2560,0.018847999970118205
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,8192,2560,0.02334400018056234
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,8192,8192,0.03228586713473002
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,8192,2048,0.015343999862670899
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,8192,7168,0.028812799851099653
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,8192,2048,0.021793067455291748
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,8192,1536,0.011986133456230164
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,8192,6144,0.02590293288230896
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,8192,1536,0.020090667406717937
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,8192,1024,0.009097599983215332
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,8192,5120,0.022679466009140014
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,8192,1024,0.018822399775187175
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,8192,768,0.007729066908359528
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,8192,768,0.016523733735084534
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,8192,4096,0.019167999426523842
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,8192,512,0.006325333317120869
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,8192,512,0.016075733304023742
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,8192,3072,0.015490133563677469
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,8192,256,0.004045866678158442
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,8192,768,0.007063466807206471
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,8192,256,0.015602133671442666
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,8192,2048,0.012286933263142903
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,8192,128,0.0036938667297363283
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,8192,1024,0.008175999919573466
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,8192,128,0.015212800105412802
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,8192,64,0.0034495999415715536
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,8192,3584,0.017860267559687296
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,8192,64,0.015435733397801719
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,8192,32,0.0038293334345022834
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,8192,32,0.015384533007939658
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,7168,65536,0.21279892921447754
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,7168,65536,0.36720107396443685
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,8192,2560,0.014192000031471252
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,7168,16384,0.09461759726206462
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,7168,16384,0.06744320392608642
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,7168,12288,0.07435200214385987
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,7168,12288,0.052546131610870364
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,8192,1536,0.011018666625022887
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,7168,10240,0.056032001972198486
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,7168,10240,0.046528001626332596
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,8192,512,0.006702933212121327
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,7168,8192,0.04509546756744385
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,7168,8192,0.040065066019694014
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,8192,128,0.0059008002281188965
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,8192,256,0.006018133461475372
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,7168,7168,0.04024639924367269
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,7168,7168,0.03617493311564128
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,7168,6144,0.03584213256835937
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,7168,6144,0.03371413151423137
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,7168,5120,0.030376533667246502
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,7168,5120,0.03020906647046407
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,7168,12288,0.041801599661509196
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,7168,65536,0.1947711944580078
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,7168,4096,0.024572799603144325
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,7168,4096,0.026958932479222614
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,7168,3584,0.022064000368118286
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,7168,16384,0.053858133157094326
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,7168,3584,0.025319466988245647
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,7168,3072,0.01950506567955017
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,7168,3072,0.02339413364728292
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,7168,10240,0.036033066113789876
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,7168,2560,0.016793600718180337
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,7168,2560,0.02242986758550008
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,7168,6144,0.023616000016530355
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,7168,2048,0.0134634663661321
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,7168,2048,0.020895999670028687
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,7168,7168,0.02651519974072774
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,7168,1536,0.010825600226720173
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,7168,8192,0.029564799865086873
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,7168,1536,0.019153066476186118
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,7168,1024,0.008361599842707316
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,7168,1024,0.0175327996412913
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,7168,5120,0.020725333690643312
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,7168,768,0.007079466680685679
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,7168,768,0.015958399573961893
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,7168,3072,0.014219733079274497
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,7168,512,0.004971733192602793
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,7168,512,0.015852800011634825
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,7168,4096,0.01768959959348043
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,7168,256,0.0038399999340375268
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,7168,256,0.015293866395950317
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,7168,3584,0.015592533349990844
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,7168,128,0.0034101332227389016
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,7168,128,0.015212800105412802
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,7168,2560,0.013051733374595642
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,7168,64,0.003230933348337809
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,7168,64,0.01535146633783976
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,7168,32,0.003619199991226196
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,7168,32,0.015347199638684592
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,6144,65536,0.3232778549194336
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,6144,65536,0.20414719581604004
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,7168,1536,0.00988266666730245
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,6144,16384,0.08098346392313639
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,6144,16384,0.05766079823176066
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,7168,1024,0.0075914666056633
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,6144,12288,0.05702079931894938
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,7168,2048,0.011256532867749532
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,6144,12288,0.047193598747253415
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,6144,10240,0.04837439854939778
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,6144,10240,0.04153706630071004
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,7168,768,0.006664533416430156
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,6144,8192,0.03927679856618245
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,6144,8192,0.03701866865158081
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,7168,512,0.00622506688038508
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,7168,256,0.005693866809209188
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,6144,7168,0.034601600964864095
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,6144,7168,0.03372906843821208
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,6144,6144,0.031035733222961426
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,7168,128,0.0055061335364977515
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,6144,6144,0.030634667476018267
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,6144,5120,0.0262880007425944
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,6144,5120,0.0277834673722585
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,6144,65536,0.17693012555440266
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,6144,4096,0.02447999914487203
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,6144,16384,0.04834773143132527
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,6144,4096,0.025139200687408447
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,6144,3584,0.019441066185633342
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,6144,3584,0.023689599831899007
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,6144,12288,0.03771946827570598
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,6144,3072,0.017114667097727458
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,6144,3072,0.022457599639892578
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,6144,10240,0.032281599442164105
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,6144,2560,0.014403200149536133
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,6144,2560,0.021523199478785195
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,6144,6144,0.021579732497533165
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,6144,2048,0.011869866649309795
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,6144,7168,0.02408533294995626
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,6144,2048,0.020476800203323365
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,6144,1536,0.009674666325251262
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,6144,8192,0.026897066831588747
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,6144,1536,0.01884053349494934
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,6144,1024,0.007640533149242401
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,6144,1024,0.01593066652615865
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,6144,5120,0.01875093380610148
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,6144,3072,0.012925866246223449
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,6144,768,0.006489600241184235
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,6144,4096,0.015980799992879234
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,6144,768,0.01628266672293345
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,6144,512,0.004293333490689596
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,6144,512,0.015639467040697734
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,6144,256,0.0037290667494138084
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,6144,256,0.015289599696795145
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,6144,3584,0.014269866545995078
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,6144,128,0.0034058667719364167
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,6144,128,0.015180800358454385
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,6144,2560,0.011802666385968526
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,6144,1536,0.009202133615811665
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,6144,64,0.0032032000521818793
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,6144,32,0.003585066646337509
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,6144,64,0.015209600329399109
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,6144,32,0.01548693378766378
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,6144,2048,0.01048959990342458
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,5120,65536,0.26386879285176595
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,5120,65536,0.16850560506184895
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,5120,16384,0.07716693083445231
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,5120,16384,0.05192000071207682
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,5120,12288,0.05527466535568237
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,5120,12288,0.043594666322072345
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,6144,1024,0.0068906664848327635
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,5120,10240,0.047914667924245195
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,5120,10240,0.03771413167317708
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,6144,128,0.005223466455936432
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,6144,256,0.005508266886075338
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,5120,8192,0.033921066919962564
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,6144,512,0.005766400198141734
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,5120,8192,0.033326933781305954
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,6144,768,0.0062389334042867025
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,5120,7168,0.03016746640205383
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,5120,7168,0.030694399277369184
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,5120,6144,0.02691413362820943
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,5120,6144,0.028243199984232588
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,5120,5120,0.022614399592081703
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,5120,5120,0.025614933172861738
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,5120,12288,0.034192001819610594
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,5120,4096,0.018532266219456993
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,5120,4096,0.023410133520762124
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,5120,65536,0.15944852828979492
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,5120,3584,0.017948800325393678
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,5120,16384,0.04404053290685018
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,5120,3584,0.022386133670806885
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,5120,3072,0.014631467064221701
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,5120,3072,0.021575466791788737
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,5120,10240,0.02976106603940328
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,5120,2560,0.012530133128166199
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,5120,2560,0.020890667041142782
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,5120,6144,0.01984213391939799
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,5120,2048,0.010365866621335347
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,5120,2048,0.019793067375818887
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,5120,7168,0.0223146657148997
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,5120,1536,0.008703999718030294
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,5120,8192,0.024733867247899374
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,5120,1536,0.017663999398549398
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,5120,1024,0.006863999863465626
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,5120,1024,0.015896532932917276
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,5120,5120,0.01731626590092977
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,5120,768,0.005235200126965841
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,5120,768,0.016049066185951234
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,5120,3072,0.012097066640853882
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,5120,512,0.004041599979003271
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,5120,512,0.01564800043900808
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,5120,4096,0.014542933305104574
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,5120,256,0.0036586667100588477
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,5120,256,0.015188266833623251
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,5120,3584,0.013500799735387167
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,5120,128,0.003257599969704946
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,5120,2560,0.011348266402880352
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,5120,128,0.014803199966748556
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,5120,64,0.003125333289305369
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,5120,64,0.014885333180427552
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,5120,32,0.003492266684770584
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,5120,32,0.014881066481272378
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,4096,65536,0.20461759567260743
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,4096,65536,0.14322346051534016
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,5120,1536,0.008513066172599792
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,4096,16384,0.05901439984639486
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,4096,16384,0.04603093465169271
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,5120,2048,0.009889066219329834
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,4096,12288,0.04032426675160726
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,5120,1024,0.006724266707897187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,4096,12288,0.039250131448109946
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,5120,768,0.006263466676076253
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,4096,10240,0.03426026503245036
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,4096,10240,0.03419839938481649
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,4096,8192,0.02789546648661296
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,4096,8192,0.030243200063705445
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,5120,256,0.005515733361244201
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,5120,512,0.005748266478379568
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,4096,7168,0.024897066752115886
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,4096,7168,0.02792746623357137
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,4096,6144,0.02254293362299601
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,5120,128,0.005212800204753875
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,4096,6144,0.025890133778254193
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,4096,5120,0.01927893360455831
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,4096,65536,0.13701972961425782
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,4096,16384,0.037154134114583334
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,4096,5120,0.02396906614303589
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,4096,4096,0.015794133146603904
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,4096,4096,0.022107734282811483
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,4096,3584,0.013834666212399802
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,4096,3584,0.021314134200414024
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,4096,12288,0.02866986592610677
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,4096,3072,0.012122666835784912
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,4096,3072,0.0204693337281545
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,4096,10240,0.024689066410064697
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,4096,2560,0.010662399729092916
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,4096,8192,0.02072640061378479
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,4096,2560,0.01992320020993551
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,4096,2048,0.009318400422732036
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,4096,2048,0.01917653282483419
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,4096,7168,0.018769067525863648
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,4096,1536,0.00782293329636256
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,4096,1536,0.01635199983914693
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,4096,6144,0.016695467631022136
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,4096,1024,0.006089599927266439
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,4096,1024,0.016475733121236166
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,4096,5120,0.014784000317255654
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,4096,768,0.004985600213209788
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,4096,768,0.015803733468055726
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,4096,4096,0.012914133071899415
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,4096,512,0.004117333392302195
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,4096,3072,0.010962133606274922
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,4096,512,0.015454933047294617
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,4096,256,0.003583999971548716
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,4096,3584,0.01214400033156077
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,4096,256,0.0150709331035614
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,4096,2048,0.009121066331863404
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,4096,128,0.0032597333192825317
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,4096,128,0.014912000298500061
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,4096,2560,0.010263466835021972
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,4096,1024,0.006567466755708058
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,4096,64,0.00322026660044988
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,4096,32,0.003483733286460241
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,4096,64,0.015122133493423461
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,4096,32,0.015214932958285013
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,3584,65536,0.17869332631429036
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,3584,65536,0.13098666667938233
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,3584,16384,0.05311040083567301
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,3584,16384,0.04396479924519857
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,4096,1536,0.007627733548482259
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,3584,12288,0.03638399839401245
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,3584,12288,0.03615466753641765
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,3584,10240,0.030431999762852983
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,3584,10240,0.0329749325911204
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,4096,768,0.0061471998691558834
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,3584,8192,0.025111466646194458
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,3584,8192,0.02792746623357137
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,4096,128,0.005128533144791921
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,4096,512,0.005694933235645294
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,4096,256,0.0052373334765434265
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,3584,7168,0.02257706721623739
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,3584,7168,0.026187733809153242
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,3584,6144,0.020242132743199668
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,3584,6144,0.02436479926109314
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,3584,5120,0.016950400670369466
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,3584,5120,0.023458133141199745
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,3584,65536,0.1286517302195231
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,3584,4096,0.013817600409189858
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,3584,12288,0.027131734291712443
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,3584,16384,0.03508586486180623
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,3584,4096,0.021740800142288207
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,3584,3584,0.012541866302490235
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,3584,3584,0.021219199895858763
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,3584,3072,0.01156160036722819
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,3584,3072,0.02038080096244812
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,3584,10240,0.023194666703542074
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,3584,2560,0.010204799969991048
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,3584,2560,0.01949866612752279
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,3584,8192,0.019364267587661743
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,3584,2048,0.008994133273760477
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,3584,2048,0.018619734048843383
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,3584,7168,0.01764586567878723
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,3584,1536,0.007577600081761678
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,3584,1536,0.016819200913111367
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,3584,6144,0.01562879979610443
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,3584,1024,0.005645866692066193
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,3584,5120,0.01402239998181661
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,3584,1024,0.01609386702378591
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,3584,768,0.004799999793370565
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,3584,768,0.015875200430552162
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,3584,4096,0.012551466623942057
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,3584,512,0.004054400076468786
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,3584,512,0.0164490669965744
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,3584,3072,0.010685867071151734
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,3584,256,0.0035648000737031303
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,3584,256,0.015346133708953857
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,3584,3584,0.011727999647458394
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,3584,128,0.0034805332620938623
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,3584,128,0.014934399724006652
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,3584,2560,0.01001706620057424
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,3584,64,0.0032255999743938447
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,3584,64,0.01516800026098887
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,3584,2048,0.008745599786440532
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,3584,32,0.003219199925661087
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,3584,768,0.00588266650835673
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,3584,32,0.015013333161671957
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,3584,1024,0.006299733122189839
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,3584,1536,0.007419733206431071
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,3072,65536,0.15262932777404786
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,3072,65536,0.11817813714345296
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,3072,16384,0.04365013440450032
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,3584,512,0.0055861334005991616
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,3072,12288,0.03526826699574788
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,3072,16384,0.040370134512583415
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,3584,256,0.00553173323472341
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,3072,12288,0.03434346516927083
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,3072,10240,0.030880000193913775
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,3072,10240,0.0303872009118398
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,3072,8192,0.022563199202219643
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,3072,8192,0.026504532496134443
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,3584,128,0.00518506666024526
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,3072,7168,0.020475733280181884
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,3072,7168,0.02536746660868327
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,3072,6144,0.018117332458496095
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,3072,6144,0.024230400721232094
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,3072,5120,0.016724266608556113
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,3072,5120,0.02291626731554667
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,3072,65536,0.13034026622772216
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,3072,16384,0.03649173180262248
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,3072,4096,0.012365866700808208
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,3072,4096,0.02108586629231771
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,3072,12288,0.02763413389523824
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,3072,3584,0.011562666296958924
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,3072,3584,0.02061333258946737
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,3072,3072,0.010999466975529988
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,3072,10240,0.023732266823450723
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,3072,3072,0.0199178675810496
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,3072,2560,0.009431466460227966
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,3072,8192,0.019679999351501463
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,3072,2560,0.01874133348464966
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,3072,7168,0.017734400431315103
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,3072,2048,0.008123733103275299
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,3072,2048,0.016979199647903443
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,3072,6144,0.016110933820406594
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,3072,1536,0.007113599777221679
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,3072,1536,0.01681386629740397
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,3072,1024,0.005061333378156027
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,3072,1024,0.016169599692026772
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,3072,5120,0.014481066664059957
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,3072,768,0.00472320020198822
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,3072,768,0.015922133127848306
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,3072,4096,0.012665599584579468
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,3072,3072,0.010575999816258747
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,3072,512,0.004116266717513402
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,3072,512,0.01590720017751058
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,3072,256,0.003639466563860575
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,3072,256,0.015228799978892007
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,3072,3584,0.011556266744931539
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,3072,128,0.003134933362404505
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,3072,1536,0.007131733496983846
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,3072,128,0.015239466230074564
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,3072,64,0.0030261332790056865
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,3072,64,0.014990933736165366
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,3072,2560,0.00972160001595815
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,3072,32,0.0032330666979153953
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,3072,32,0.014588800072669984
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,3072,1024,0.006376533210277558
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,3072,2048,0.008254933357238769
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,2560,65536,0.10543893178304035
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,2560,65536,0.1311072031656901
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,2560,16384,0.03917760054270426
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,2560,16384,0.03892800013224284
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,2560,12288,0.031004800399144487
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,2560,12288,0.03170560002326965
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,2560,10240,0.027480532725652058
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,2560,10240,0.02815893292427063
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,3072,768,0.005982933441797892
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,2560,8192,0.022401066621144612
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,2560,8192,0.02532800038655599
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,3072,512,0.0055754666527112326
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,3072,256,0.005328000088532766
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,2560,7168,0.018180267016092936
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,3072,128,0.004984533290068308
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,2560,7168,0.023944532871246337
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,2560,6144,0.016176000237464905
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,2560,6144,0.023245867093404135
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,2560,5120,0.013558399677276612
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,2560,5120,0.022124799092610677
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,2560,12288,0.026259199778238936
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,2560,65536,0.12809173266092938
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,2560,4096,0.011325866977373759
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,2560,16384,0.0347818652788798
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,2560,4096,0.020589866240819297
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,2560,10240,0.02300693392753601
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,2560,3584,0.010524800419807434
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,2560,3584,0.019846399625142418
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,2560,3072,0.009567999839782714
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,2560,3072,0.019375999768575035
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,2560,2560,0.00851093331972758
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,2560,2560,0.018663465976715088
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,2560,8192,0.018948266903559365
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,2560,2048,0.007738666733105977
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,2560,2048,0.017126399278640746
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,2560,6144,0.015667200088500977
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,2560,7168,0.017443199952443443
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,2560,1536,0.006411733229955037
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,2560,1536,0.016570666432380678
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,2560,5120,0.013964800039927163
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,2560,1024,0.004731733103593191
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,2560,1024,0.016126933693885803
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,2560,768,0.004324266811211904
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,2560,4096,0.01239359974861145
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,2560,768,0.01553813318411509
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,2560,3072,0.010077866911888122
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,2560,512,0.003833599885304769
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,2560,512,0.015518933534622192
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,2560,256,0.003416533271471659
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,2560,256,0.01530026694138845
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,2560,2560,0.0093450665473938
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,2560,3584,0.011249066392580668
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,2560,128,0.0031871999303499854
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,2560,128,0.014578133821487427
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,2560,64,0.0029567999144395193
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,2560,64,0.014756266276041666
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,2560,32,0.0030218665798505146
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,2560,32,0.014834133783976236
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,2048,65536,0.10717333157857259
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,2048,65536,0.09397973219553629
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,2560,2048,0.008003200093905132
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,2560,1536,0.0070720002055168155
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,2048,16384,0.03294933239618937
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,2048,16384,0.034721068541208905
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,2048,12288,0.02542080084482829
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,2048,12288,0.03025706609090169
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,2560,1024,0.006391466657320659
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,2048,10240,0.022431999444961548
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,2048,10240,0.028705066442489623
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,2560,256,0.005277866621812185
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,2560,768,0.005985066791375478
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,2560,512,0.005628799895445505
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,2048,8192,0.018210132916768394
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,2048,8192,0.025283199548721314
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,2048,7168,0.01621119976043701
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,2560,128,0.005095466474692027
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,2048,7168,0.024286933739980063
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,2048,6144,0.015688533584276833
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,2048,6144,0.023242666323979696
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,2048,5120,0.013671466708183288
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,2048,5120,0.020974934101104736
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,2048,65536,0.1043936014175415
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,2048,4096,0.011457066734631855
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,2048,4096,0.02007360061009725
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,2048,16384,0.028467200199762982
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,2048,3584,0.010195199648539226
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,2048,3584,0.01911999980608622
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,2048,12288,0.022120533386866252
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,2048,3072,0.009524266918500264
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,2048,3072,0.01800533334414164
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,2048,10240,0.019373865922292073
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,2048,2560,0.007787733276685078
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,2048,2560,0.017873066663742065
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,2048,8192,0.016242133577664693
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,2048,6144,0.013993600010871887
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,2048,7168,0.015301332871119181
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,2048,2048,0.006894933183987935
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,2048,5120,0.012474667032559712
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,2048,2048,0.016988799969355265
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,2048,1536,0.005679999788602194
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,2048,1536,0.01634880006313324
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,2048,1024,0.004584533472855886
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,2048,1024,0.016323199868202208
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,2048,768,0.0040832000474135075
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,2048,768,0.01599679986635844
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,2048,4096,0.011170132954915365
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,2048,512,0.0036298667391141256
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,2048,512,0.014972800016403198
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,2048,3072,0.008803199728329976
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,2048,3584,0.010506666700045268
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,2048,256,0.0033130665620168054
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,2048,256,0.014747732877731323
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,2048,128,0.0030293333033720653
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,2048,128,0.014641066392262777
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,2048,2560,0.0083146666487058
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,2048,64,0.0028704000016053517
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,2048,2048,0.0074314668774604796
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,2048,64,0.014550399780273438
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,2048,32,0.0029504001140594482
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,2048,1024,0.006106666723887126
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,2048,768,0.0057546665271123254
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,2048,32,0.014670933286348978
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,1536,65536,0.08399893442789713
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,2048,1536,0.006997333467006683
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,1536,65536,0.07992746829986572
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,1536,16384,0.025534933805465697
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,1536,16384,0.03131519953409831
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,1536,12288,0.020626133680343627
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,1536,12288,0.02773546576499939
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,1536,10240,0.01786880095799764
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,1536,10240,0.025270400444666545
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,2048,512,0.005387733379999796
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,1536,8192,0.014386133352915446
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,1536,8192,0.02376746733983358
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,2048,256,0.005157333115736643
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,1536,7168,0.01333440045515696
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,1536,7168,0.022551467021306358
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,2048,128,0.004958933095137278
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,1536,6144,0.012506666779518127
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,1536,6144,0.021921066443125407
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,1536,65536,0.09970453580220541
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,1536,5120,0.011169067025184632
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,1536,5120,0.02071146567662557
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,1536,16384,0.02802986701329549
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,1536,4096,0.009598933657010396
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,1536,4096,0.018716800212860107
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,1536,3584,0.008973866701126099
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,1536,12288,0.021622399489084877
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,1536,3584,0.018439465761184694
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,1536,3072,0.008257066706816356
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,1536,3072,0.01727893352508545
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,1536,10240,0.018778665860493978
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,1536,2560,0.0072970668474833175
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,1536,2560,0.01759679913520813
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,1536,8192,0.01597546637058258
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,1536,2048,0.006487466891606649
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,1536,2048,0.016701867183049522
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,1536,7168,0.014755200346310934
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,1536,1536,0.005338666836420695
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,1536,6144,0.013544533650080362
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,1536,1536,0.016370133558909098
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,1536,1024,0.004490666588147481
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,1536,1024,0.015727999806404113
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,1536,5120,0.012121599912643433
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,1536,768,0.004107733319203059
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,1536,4096,0.010269866387049357
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,1536,768,0.01572266618410746
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,1536,3072,0.008628267049789428
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,1536,512,0.0036949334045251214
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,1536,512,0.015121066570281982
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,1536,3584,0.009566932916641235
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,1536,256,0.0033141332368055976
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,1536,256,0.014892799655596414
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,1536,2048,0.0072629332542419435
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,1536,2560,0.008148266871770223
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,1536,128,0.002997333308060964
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,1536,128,0.014490666985511779
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,1536,1536,0.006929066777229309
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,1536,64,0.0028543998797734577
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,1536,64,0.014477866888046264
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,1536,32,0.0029333333174387617
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,1536,32,0.015001599987347921
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,1024,65536,0.05846399863560995
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,1024,65536,0.06862613360087076
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,1536,1024,0.005936000247796377
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,1024,65536,0.09556480248769125
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,1024,16384,0.018386133511861167
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,1024,16384,0.027012266715367633
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,1024,16384,0.02597759962081909
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,1024,12288,0.014250666896502177
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,1536,768,0.005585066477457682
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,1536,512,0.005239466826121012
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,1536,256,0.0050687998533248905
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,1024,12288,0.02335466742515564
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,1024,10240,0.012524799505869547
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,1024,10240,0.02363626758257548
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,1024,8192,0.01090773344039917
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,1536,128,0.004906666775544485
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,1024,8192,0.021564799547195434
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,1024,8192,0.015797332922617594
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,1024,7168,0.011781332890192668
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,1024,7168,0.02072319984436035
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,1024,12288,0.021319466829299926
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,1024,6144,0.00969493289788564
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,1024,10240,0.01858133276303609
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,1024,4096,0.008468266328175862
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,1024,6144,0.021066667636235555
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,1024,6144,0.012692266702651977
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,1024,3584,0.008334933718045553
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,1024,5120,0.009546666344006857
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,1024,5120,0.019973333676656088
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,1024,3072,0.007817600170771282
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,1024,7168,0.014387200276056925
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,1024,4096,0.019025067488352455
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,1024,3584,0.01810773412386576
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,1024,3072,0.01763520042101542
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,1024,5120,0.011313066879908244
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,1024,2048,0.005671466886997223
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,1024,2560,0.006325333317120869
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,1024,2560,0.017288533846537273
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,1024,1536,0.004901333153247834
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,1024,4096,0.009959466258684794
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,1024,1536,0.015936000148455302
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,1024,2560,0.00796693315108617
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,1024,3584,0.009447466333707173
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,1024,2048,0.016722132762273155
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,1024,3072,0.008514133095741273
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,1024,1024,0.0042303999265035
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,1024,1024,0.015438933173815408
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,1024,768,0.0038880000511805216
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,1024,2048,0.0072405333320299785
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,1024,768,0.015618133544921874
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,1024,1536,0.006841599941253662
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,1024,512,0.0035605333745479585
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,1024,512,0.014954666296641031
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,1024,1024,0.005946666498978933
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,1024,256,0.0031786667803923286
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,1024,256,0.014776532848676046
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,1024,768,0.005468800167242686
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,1024,256,0.004953599969546
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,1024,128,0.0030303999781608583
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,1024,128,0.014531200130780539
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,1024,128,0.004740266501903534
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,1024,512,0.005239466826121012
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,1024,64,0.0028213332096735638
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,1024,64,0.01467519998550415
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,1024,32,0.0028149334092934927
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,1024,32,0.014678399761517844
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,768,65536,0.04693973461786906
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,768,65536,0.062286933263142906
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,768,16384,0.014531200130780539
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,768,65536,0.09878506660461425
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,768,10240,0.021176532904307047
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,768,16384,0.025308799743652344
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,768,12288,0.011782399813334147
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,768,12288,0.02249173323313395
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,768,10240,0.011619200309117634
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,768,16384,0.026809600989023845
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,768,10240,0.01862506667772929
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,768,12288,0.021210666497548422
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,768,8192,0.010058666268984478
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,768,8192,0.020483199755350748
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,768,7168,0.009594666957855224
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,768,7168,0.02113173405329386
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,768,7168,0.013796266913414002
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,768,6144,0.008936533331871032
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,768,8192,0.015572266777356467
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,768,6144,0.0203658660252889
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,768,5120,0.00876800020535787
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,768,5120,0.019642666975657145
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,768,6144,0.012579199671745301
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,768,5120,0.011228799819946289
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,768,4096,0.007665066421031952
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,768,4096,0.01857066750526428
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,768,4096,0.009849599997202555
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,768,3584,0.007899733384450276
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,768,3584,0.018237866957982383
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,768,3584,0.009311999877293904
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,768,3072,0.006930133203665416
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,768,3072,0.017927465836207072
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,768,2560,0.006227200229962667
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,768,2560,0.01744746764500936
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,768,3072,0.008570667107899983
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,768,2048,0.005522133409976959
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,768,2048,0.01667413314183553
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,768,2048,0.007091199855009715
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,768,2560,0.008075733482837678
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,768,1536,0.00487360010544459
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,768,1536,0.01603626708189646
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,768,1024,0.004177066683769226
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,768,1536,0.006762666503588359
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,768,1024,0.015544533729553223
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,768,768,0.0037962667644023894
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,768,1024,0.005861333509286245
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,768,768,0.015152000387509666
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,768,512,0.003453866640726725
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,768,512,0.015313067038853965
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,768,768,0.005566933254400889
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,768,512,0.005125333368778229
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,768,256,0.0031306666632493338
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,768,256,0.014749866724014283
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,768,256,0.004914133250713349
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,768,128,0.002917333443959554
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,768,128,0.014590932925542196
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,768,128,0.004729599754015604
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,768,64,0.0027722666660944624
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,768,64,0.014603733023007711
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,768,32,0.002841600030660629
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,768,32,0.014569600423177084
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,512,65536,0.03463146686553955
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,512,65536,0.05555733442306519
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,512,65536,0.09476799964904785
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,512,16384,0.011923199892044068
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,512,16384,0.023520000775655112
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,512,12288,0.010067199667294819
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,512,12288,0.022651733954747517
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,512,12288,0.020872533321380615
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,512,10240,0.009643733501434326
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,512,10240,0.01812053322792053
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,512,10240,0.02142080068588257
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,512,8192,0.008363733688990276
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,512,8192,0.02019946575164795
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,512,8192,0.01476479967435201
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,512,7168,0.007829333345095318
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,512,7168,0.019937066237131755
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,512,6144,0.009166933099428813
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,512,6144,0.01926079988479614
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,512,6144,0.012388267119725545
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,512,5120,0.007851733267307282
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,512,5120,0.019614932934443156
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,512,4096,0.007089066505432129
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,512,4096,0.018950400749842326
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,512,3584,0.006740266581376393
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,512,3584,0.017990400393803917
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,512,3072,0.006504533191521962
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,512,3072,0.01755519906679789
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,512,2560,0.006308266520500183
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,512,2560,0.01722559928894043
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,512,2560,0.00809386670589447
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,512,2048,0.0055189331372578945
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,512,2048,0.016694400707880655
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,512,16384,0.026395734151204425
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,512,2048,0.007201066613197327
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,512,1536,0.004851200183232625
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,512,1536,0.016265599926312765
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,512,1536,0.006781866649786632
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,512,1024,0.004188799858093261
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,512,1024,0.0156960000594457
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,512,768,0.003786666691303253
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,512,768,0.015465600291887918
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,512,7168,0.013758933544158936
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,512,512,0.0034901333351929987
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,512,512,0.01495039959748586
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,512,512,0.005178666611512502
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,512,256,0.0030752000709374744
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,512,256,0.01479573349157969
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,512,5120,0.011222400267918905
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,512,128,0.0028704000016053517
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,512,128,0.014505599935849508
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,512,128,0.004679466784000397
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,512,4096,0.009941333532333374
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,512,64,0.002845866729815801
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,512,32,0.0030613332986831666
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,512,64,0.014765866597493491
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,512,3584,0.009416533509890239
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,512,32,0.014691199858983359
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,256,65536,0.023162666956583658
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,256,65536,0.047643733024597165
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,512,3072,0.008380800485610962
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,256,16384,0.008531199892361958
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,256,16384,0.021613866090774536
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,256,12288,0.008430932958920796
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,256,12288,0.02029119928677877
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,256,10240,0.007402666906515758
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,256,10240,0.02016426722208659
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,256,10240,0.017298134167989095
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,256,8192,0.006705066561698914
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,256,8192,0.019130667050679527
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,256,8192,0.014696533481280008
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,256,7168,0.006388266881306966
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,256,7168,0.01991893251736959
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,256,7168,0.013470932841300964
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,256,6144,0.0062389334042867025
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,512,1024,0.005888000130653381
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,256,6144,0.018811732530593872
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,256,6144,0.012451199690500896
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,256,5120,0.006583466629187266
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,256,5120,0.019292799631754558
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,256,5120,0.011160533626874287
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,256,4096,0.006230400005976359
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,256,4096,0.018071466684341432
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,256,3584,0.006571733454863231
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,512,768,0.005538133283456167
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,256,3584,0.018131200472513834
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,256,3584,0.009382399916648864
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,256,3072,0.006211199859778086
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,256,3072,0.017454934120178223
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,256,3072,0.008411733309427898
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,256,2560,0.006089599927266439
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,512,256,0.004791466891765595
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,256,2560,0.016899200280507405
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,256,2560,0.007906133433183034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,256,2048,0.005449600021044413
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,256,2048,0.016661333044370015
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,256,2048,0.007085866729418437
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,256,1536,0.00472320020198822
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,256,1536,0.015889066457748412
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,256,1536,0.0067071999112765
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,256,1024,0.01555519998073578
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,256,1024,0.0040522667268912
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,256,1024,0.005929600199063619
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,256,65536,0.090775465965271
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,256,768,0.003751466671625773
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,256,16384,0.024877866109212242
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,256,768,0.015455999970436096
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,256,512,0.0033674667278925574
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,256,12288,0.019891200462977092
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,256,512,0.014980266491572062
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,256,512,0.005202133456865946
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,256,256,0.003054933249950409
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,256,256,0.014868266383806863
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,256,256,0.004747733473777771
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,256,128,0.0028192001084486645
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,256,128,0.014330666263898215
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,256,64,0.0028938665986061097
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,256,64,0.014367999633153281
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,256,32,0.0028362666567166646
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,256,32,0.014361600081125895
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,128,65536,0.01318933367729187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,128,65536,0.041238399346669515
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,256,4096,0.010125866532325745
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,128,16384,0.006932266553243001
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,128,16384,0.02145706613858541
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,128,12288,0.006333866715431213
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,128,10240,0.00644053320089976
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,128,12288,0.02001813252766927
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,128,10240,0.02072426676750183
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,128,10240,0.01735466718673706
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,128,8192,0.006359466910362243
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,128,8192,0.01900906761487325
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,128,8192,0.014572800199190775
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,128,7168,0.006265600025653839
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,128,7168,0.019731199741363524
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,128,6144,0.00609493354956309
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,128,6144,0.01887893279393514
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,256,768,0.005373866856098175
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,128,5120,0.006455466647942861
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,128,5120,0.018972800175348917
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,128,5120,0.011096533139546711
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,256,128,0.004603733122348785
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,128,4096,0.006060799956321717
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,128,4096,0.018244266510009766
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,128,3584,0.006559999783833821
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,128,3584,0.018026665846506754
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,128,3584,0.00928533375263214
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,128,65536,0.09177066485087076
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,128,16384,0.025382399559020996
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,128,3072,0.00624533345301946
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,128,12288,0.019950934251149497
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,128,3072,0.017624533176422118
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,128,7168,0.01353600025177002
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,128,2560,0.006022400160630544
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,128,2560,0.016731733083724977
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,128,2560,0.00786133309205373
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,128,2048,0.005458133419354757
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,128,2048,0.016463999946912132
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,128,2048,0.007010133564472198
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,128,6144,0.01234346628189087
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,128,1536,0.00472320020198822
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,128,1536,0.015801599621772765
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,128,1536,0.006859733164310456
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,128,1024,0.003990400085846583
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,128,1024,0.015382400155067444
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,128,768,0.0037205333511034647
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,128,768,0.015524267156918844
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,128,512,0.014857600132624308
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,128,512,0.0033269333342711128
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,128,512,0.0051466668645540874
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,128,4096,0.009701333443323771
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,128,256,0.003032533327738444
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,128,256,0.014531200130780539
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,128,256,0.004885333279768625
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,128,128,0.0028213332096735638
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,128,128,0.014454399545987448
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,128,128,0.004645333190759023
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,128,64,0.0026752000053723653
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,128,64,0.014453333616256715
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,128,32,0.0026986666023731233
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,128,3072,0.008272000153859456
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,128,32,0.014475733041763306
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,64,65536,0.011261866490046183
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,64,65536,0.037836798032124835
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,128,1024,0.0058335999647776285
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,64,16384,0.006205866734186808
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,64,16384,0.022299732764561972
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,64,12288,0.006179200112819671
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,128,128,768,0.005487999816735586
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,64,12288,0.019656533002853395
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,64,10240,0.006431999802589417
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,64,10240,0.01978879968325297
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,64,5120,0.0063701331615448
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,64,8192,0.006359466910362243
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,64,4096,0.005965866645177205
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,64,8192,0.01887786587079366
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,64,3584,0.006433066725730896
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,64,7168,0.006217599908510844
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,64,6144,0.006042666733264923
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,64,7168,0.019853866100311278
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,64,6144,0.0187008003393809
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,64,5120,0.01904639999071757
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,64,4096,0.018041600783665977
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,64,3584,0.017822933197021485
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,64,3072,0.006120533247788747
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,64,3072,0.01748159925142924
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,64,2560,0.006077866752942403
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,64,2560,0.01745706597963969
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,64,2048,0.005334400137265523
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,64,2048,0.016318933169047038
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,64,1536,0.004710400104522705
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,64,1536,0.015735466281572977
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,64,1024,0.0039381332695484165
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,64,1024,0.015277866522471109
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,64,128,0.0027744000156720476
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,64,768,0.0037162666519482933
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,64,768,0.015386666854222616
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,64,512,0.003320533285538355
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,64,512,0.015106133619944253
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,64,256,0.0029866665601730345
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,64,256,0.014689067006111145
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,64,64,0.0026762666801611584
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,64,128,0.014537599682807923
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,64,64,0.014523733655611673
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,64,32,0.0026933332284291584
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,64,32,0.014387200276056925
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,32,65536,0.00906773308912913
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,32,65536,0.03713920116424561
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,32,16384,0.0061706667145093284
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,32,16384,0.020961066087086998
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,32,12288,0.006052266558011373
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,32,12288,0.020096000035603842
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,32,10240,0.006235733131567637
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,32,8192,0.006126933296521505
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,32,10240,0.019621332486470543
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,32,8192,0.018986666202545167
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,32,7168,0.0060576001803080235
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,32,7168,0.019203199942906698
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,32,6144,0.006030933558940887
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,32,5120,0.006420266628265381
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,32,6144,0.01906879941622416
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,32,4096,0.00574186642964681
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,32,5120,0.019686400890350342
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,32,4096,0.018040533860524496
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,32,3584,0.006226133306821187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,32,3584,0.0180074671904246
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,32,3072,0.0058794667323430385
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,32,3072,0.017310933272043864
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,32,2560,0.006064000229040781
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,32,1536,0.015719466408093772
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,32,2560,0.01683733264605204
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,32,2048,0.005317333340644837
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,32,2048,0.016201600432395935
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,32,1536,0.004759466648101807
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,32,1024,0.004011733333269755
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,32,768,0.0036576000352700555
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,32,1024,0.015508266290028891
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,32,256,0.014685866236686707
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,32,512,0.003382399926582972
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,32,768,0.015187199910481772
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,32,512,0.015086932977040609
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,32,256,0.003048533449570338
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,32,128,0.002842666705449422
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,32,64,0.002644266684850057
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,32,128,0.014605866869290671
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,32,64,0.014465066790580749
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,128,32,32,0.002666666607062022
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,128,32,32,0.014403200149536133
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,65536,12288,0.30931625366210935
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,65536,16384,0.4037738800048828
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,65536,12288,0.282584540049235
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,65536,12288,0.5796735763549805
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,65536,10240,0.26000213623046875
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,65536,16384,0.7608554840087891
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,65536,10240,0.48348054885864256
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,65536,8192,0.38675947189331056
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,65536,8192,0.21400747299194336
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,65536,7168,0.21475626627604166
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,65536,7168,0.17076266606648763
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,65536,7168,0.33843307495117186
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,65536,6144,0.29391679763793943
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,65536,6144,0.16508906682332355
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,65536,6144,0.14821759859720868
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,65536,5120,0.24508907000223795
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,65536,5120,0.14056533177693684
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,65536,5120,0.13430933952331542
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,65536,4096,0.1990506649017334
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,65536,4096,0.11727786858876546
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,65536,4096,0.10783999760945637
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,65536,3584,0.17440959612528484
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,65536,3584,0.10522133509318035
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,65536,3584,0.09308693408966065
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,65536,3072,0.09311467011769613
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,65536,3072,0.1512191931406657
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,65536,3072,0.0816320021947225
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,65536,2560,0.12755839824676513
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,65536,2560,0.08089173634847005
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,65536,2560,0.0704416036605835
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,65536,2048,0.1035200039545695
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,65536,2048,0.06846720377604167
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,65536,2048,0.05810879866282145
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,65536,16384,0.37383572260538733
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,65536,1536,0.0800096035003662
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,65536,1536,0.056269868214925134
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,65536,1536,0.046828798453013104
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,65536,1024,0.05597759882609049
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,65536,1024,0.043866666158040364
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,65536,1024,0.03797653516133626
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,65536,10240,0.23723840713500977
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,65536,768,0.043747198581695554
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,65536,768,0.0376970648765564
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,65536,768,0.030909866094589233
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,65536,8192,0.20747733116149902
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,65536,512,0.032151466608047484
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,65536,512,0.030407466491063434
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,65536,256,0.020408533016840615
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,65536,256,0.023016534248987832
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,65536,256,0.017831466595331826
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,65536,128,0.01072746713956197
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,65536,64,0.008186666667461396
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,65536,128,0.021129600207010903
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,65536,64,0.019378133614857993
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,65536,32,0.007226666808128357
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,65536,32,0.019336533546447755
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,16384,16384,0.18745813369750977
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,16384,16384,0.11348053614298503
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,16384,16384,0.10196159680684407
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,16384,12288,0.14260800679524738
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,16384,65536,0.3986090660095215
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,16384,12288,0.09014933109283448
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,16384,12288,0.07812053362528483
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,16384,65536,0.7707253138224284
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,16384,10240,0.13972479502360027
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,16384,10240,0.07795626322428385
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,16384,8192,0.06635839939117431
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,16384,8192,0.05506879885991415
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,16384,8192,0.11294506390889485
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,16384,7168,0.08524160385131836
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,16384,7168,0.06003413200378418
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,65536,512,0.023612799247105916
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,16384,7168,0.04930026531219482
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,16384,6144,0.07427840232849121
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,65536,128,0.01579093337059021
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,16384,6144,0.05414719978968302
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,16384,5120,0.06256639957427979
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,16384,5120,0.047884798049926756
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,16384,5120,0.037956265608469646
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,16384,4096,0.051793066660563145
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,16384,4096,0.04142186641693115
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,16384,4096,0.03187306722005208
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,16384,3584,0.04565759897232056
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,16384,65536,0.3749098777770996
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,16384,3584,0.03869973421096802
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,16384,3584,0.029157332579294842
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,16384,3072,0.03990506728490194
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,16384,3072,0.03561600049336751
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,16384,3072,0.02609386642773946
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,16384,2560,0.03915199836095174
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,16384,10240,0.06593706607818603
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,16384,2560,0.0320906658967336
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,16384,2560,0.02290346622467041
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,16384,1536,0.024906667073567708
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,16384,2048,0.028078933556874592
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,16384,6144,0.0435914675394694
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,16384,2048,0.028490666548411054
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,16384,2048,0.019668267170588175
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,16384,1536,0.022498132785161336
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,16384,1024,0.01602240006128947
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,16384,1024,0.021703465779622396
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,16384,1024,0.013732266426086426
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,16384,768,0.01222933332125346
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,16384,768,0.02063680092493693
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,16384,512,0.00958079993724823
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,16384,512,0.01835626761118571
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,16384,256,0.006478933493296306
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,16384,256,0.016085333625475564
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,16384,256,0.007371733089288075
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,16384,128,0.004265599946180979
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,16384,128,0.015542399883270264
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,16384,1536,0.01590293347835541
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,16384,64,0.0038634667793909705
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,16384,32,0.01595200002193451
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,16384,64,0.015943466623624166
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,16384,32,0.004308266441027323
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,12288,16384,0.15587306022644043
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,12288,16384,0.09197226365407309
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,16384,768,0.011613866686820984
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,12288,65536,0.3287775993347168
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,12288,65536,0.30788478851318357
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,12288,12288,0.12218026320139568
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,12288,65536,0.5714666366577148
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,12288,12288,0.07367146809895833
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,16384,512,0.00970240036646525
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,12288,10240,0.06338346799214681
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,12288,10240,0.10647253195444743
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,12288,8192,0.07531840006510417
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,12288,8192,0.04386026859283447
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,12288,8192,0.055194667975107824
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,12288,7168,0.06564480066299438
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,12288,7168,0.050273064772288004
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,12288,7168,0.03899413347244263
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,12288,6144,0.05789973338445028
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,12288,5120,0.048979198932647704
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,12288,6144,0.04541013240814209
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,12288,5120,0.04057066837946574
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,12288,5120,0.03007253408432007
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,12288,4096,0.03927146593729655
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,12288,4096,0.036562132835388186
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,12288,4096,0.02533973256746928
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,12288,3584,0.034916265805562334
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,12288,3584,0.03359359900156657
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,12288,3072,0.030892799297968548
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,16384,128,0.006985599795977275
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,12288,3072,0.0308896005153656
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,12288,3072,0.020696532726287842
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,12288,2560,0.026394667228062947
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,12288,2560,0.027990400791168213
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,12288,2560,0.018126932779947917
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,12288,2048,0.021895466248194377
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,12288,16384,0.07910079956054687
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,12288,2048,0.025883734226226807
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,12288,12288,0.06996160348256429
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,12288,2048,0.015447466572125753
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,12288,1536,0.017571200927098594
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,12288,10240,0.05900906721750895
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,12288,1536,0.01290773351987203
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,12288,1536,0.022950400908788048
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,12288,6144,0.034408533573150636
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,12288,1024,0.012150399883588155
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,12288,1024,0.020217599471410115
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,12288,1024,0.010689066847165425
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,12288,768,0.01116480032602946
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,12288,3584,0.022957867383956908
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,12288,768,0.01914773384730021
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,12288,768,0.00947093367576599
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,12288,512,0.007957333326339721
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,12288,512,0.017369600137074788
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,12288,256,0.0044725333650906885
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,12288,256,0.015727999806404113
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,12288,256,0.006321066617965698
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,12288,128,0.0038933334251244865
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,12288,128,0.015233066678047181
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,12288,64,0.003618133316437403
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,12288,64,0.01551466683546702
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,12288,32,0.003905066599448522
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,12288,32,0.0157258669535319
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,10240,65536,0.49332691828409836
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,10240,16384,0.13725652694702148
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,10240,65536,0.28517119089762366
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,10240,16384,0.07942933241526286
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,10240,12288,0.10509333610534669
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,10240,12288,0.06413439909617105
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,12288,512,0.007582933207352956
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,10240,12288,0.054922668139139805
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,10240,10240,0.07839787006378174
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,10240,10240,0.055742931365966794
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,10240,8192,0.0627573331197103
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,12288,128,0.005930666625499725
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,10240,8192,0.048205868403116865
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,10240,7168,0.054626135031382236
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,10240,7168,0.04427200158437093
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,10240,6144,0.04837973515192668
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,10240,6144,0.040701866149902344
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,10240,5120,0.04092586835225423
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,10240,5120,0.036685868104298906
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,10240,4096,0.033367466926574704
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,10240,4096,0.03219199975331624
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,10240,65536,0.26452906926472985
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,10240,4096,0.022201599677403767
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,10240,3584,0.03408853212992351
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,10240,3584,0.029848533868789672
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,10240,3072,0.02669653296470642
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,10240,16384,0.07787306308746338
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,10240,3072,0.02774080038070679
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,10240,2560,0.02288533250490824
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,10240,2560,0.025283199548721314
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,10240,10240,0.046510934829711914
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,10240,8192,0.0384554664293925
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,10240,2048,0.01904746691385905
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,10240,7168,0.03422293265660604
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,10240,2048,0.02304746707280477
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,10240,6144,0.030050132671991987
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,10240,1536,0.014829867084821067
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,10240,1536,0.02138239940007528
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,10240,1536,0.01156160036722819
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,10240,5120,0.026183466116587322
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,10240,1024,0.010776533683141073
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,10240,3584,0.020125865936279297
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,10240,1024,0.020252799987792967
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,10240,768,0.009130666653315227
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,10240,768,0.008180266618728638
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,10240,512,0.0073749333620071415
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,10240,3072,0.017948800325393678
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,10240,768,0.018168532848358156
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,10240,512,0.016291200121243795
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,10240,2560,0.015863466262817382
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,10240,256,0.004377600053946177
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,10240,256,0.015794133146603904
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,10240,128,0.003849600007136663
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,10240,128,0.015352533260981242
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,10240,2048,0.0135861337184906
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,10240,64,0.0035487999518712364
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,10240,32,0.01541973352432251
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,10240,64,0.015589333573977151
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,10240,32,0.003722666700681051
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,8192,16384,0.10423146883646647
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,8192,65536,0.2265237331390381
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,8192,65536,0.40079145431518554
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,10240,1024,0.009567999839782714
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,8192,16384,0.06699093182881674
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,8192,12288,0.08580053647359212
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,8192,12288,0.055010131994883214
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,8192,10240,0.061963733037312826
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,8192,10240,0.04853333234786987
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,10240,512,0.007097599903742473
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,8192,8192,0.05017600059509277
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,8192,8192,0.04155946572621663
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,10240,256,0.005992533266544342
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,8192,7168,0.0445248007774353
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,8192,7168,0.03890453179677327
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,10240,128,0.00554666668176651
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,8192,6144,0.039338668187459305
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,8192,6144,0.03567893505096435
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,8192,5120,0.033316266536712644
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,8192,5120,0.03203199903170268
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,8192,65536,0.22922666867574057
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,8192,16384,0.0628554662068685
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,8192,4096,0.03120959997177124
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,8192,4096,0.02808000048001607
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,8192,12288,0.047541332244873044
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,8192,3584,0.02450986703236898
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,8192,10240,0.04085119962692261
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,8192,3584,0.026504532496134443
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,8192,3072,0.021615999937057494
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,8192,8192,0.03314239978790283
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,8192,3072,0.02476373314857483
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,8192,7168,0.029687466224034627
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,8192,2560,0.018557866414388023
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,8192,6144,0.027100799481074016
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,8192,2560,0.02297919988632202
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,8192,2048,0.015260799725850423
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,8192,2048,0.021375999848047892
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,8192,1536,0.011912533640861511
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,8192,5120,0.023104000091552734
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,8192,1536,0.01998400092124939
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,8192,1024,0.009063466389973959
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,8192,1024,0.01814613342285156
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,8192,1024,0.00905386706193288
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,8192,768,0.007646933197975159
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,8192,768,0.01646080017089844
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,8192,4096,0.01950826644897461
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,8192,512,0.0057322666049003605
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,8192,512,0.01625920037428538
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,8192,256,0.00421973317861557
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,8192,3584,0.018038400014241538
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,8192,256,0.015400532881418863
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,8192,128,0.003676799933115641
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,8192,128,0.01511253317197164
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,8192,3072,0.016219733158747356
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,8192,64,0.003370666752258936
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,8192,64,0.015502933661142984
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,8192,2560,0.014460800091425577
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,8192,32,0.0037087999284267426
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,8192,2048,0.012495999534924824
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,8192,32,0.015591466426849365
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,8192,1536,0.010863999525705974
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,7168,65536,0.352510929107666
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,7168,65536,0.21475520133972167
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,7168,16384,0.09205546379089355
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,8192,768,0.007781333227952321
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,8192,512,0.00694400022427241
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,8192,256,0.006234666705131531
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,7168,16384,0.0678879976272583
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,7168,12288,0.06498133341471354
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,7168,12288,0.05066453218460083
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,8192,128,0.005758933226267497
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,7168,10240,0.05578453143437704
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,7168,10240,0.04445120096206665
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,7168,8192,0.044487468401590985
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,7168,8192,0.03930026690165202
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,7168,7168,0.039501865704854325
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,7168,65536,0.22657279968261718
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,7168,7168,0.03593386809031169
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,7168,16384,0.06066453456878662
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,7168,12288,0.047364266713460286
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,7168,6144,0.03491520086924235
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,7168,5120,0.02983679970105489
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,7168,6144,0.03294720053672791
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,7168,10240,0.040915199120839435
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,7168,5120,0.029805866877237956
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,7168,4096,0.024795732895533242
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,7168,8192,0.032818132638931276
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,7168,4096,0.026712532838185626
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,7168,3584,0.021817600727081297
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,7168,3584,0.025088000297546386
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,7168,3072,0.019427200158437095
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,7168,3072,0.023485867182413737
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,7168,7168,0.02929813265800476
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,7168,3072,0.015588266650835672
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,7168,2560,0.016666666666666666
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,7168,2560,0.022066134214401244
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,7168,2048,0.013690666357676188
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,7168,6144,0.0256223996480306
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,7168,2048,0.020886399348576865
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,7168,1536,0.010991999506950378
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,7168,5120,0.022382932901382446
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,7168,1536,0.01960960030555725
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,7168,4096,0.0189301331837972
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,7168,1024,0.008469333251317341
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,7168,1024,0.01729493339856466
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,7168,3584,0.017065600554148356
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,7168,768,0.0071712002158164975
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,7168,768,0.01618666648864746
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,7168,512,0.004763733347256978
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,7168,2560,0.013954133788744608
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,7168,512,0.015829333662986757
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,7168,512,0.006525866687297821
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,7168,256,0.003953066716591517
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,7168,2048,0.011779200037320454
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,7168,256,0.01555519998073578
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,7168,128,0.0034506666163603462
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,7168,128,0.01532799998919169
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,7168,64,0.003509333233038584
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,7168,1536,0.010191999872525533
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,7168,64,0.015256533026695251
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,7168,32,0.003575466573238373
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,7168,32,0.01537493367989858
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,7168,1024,0.008400000135103862
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,6144,65536,0.29509973526000977
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,6144,16384,0.08378346761067709
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,6144,65536,0.223687473932902
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,6144,65536,0.18621439933776857
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,7168,768,0.0071829333901405334
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,6144,16384,0.058645331859588624
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,6144,12288,0.06437546809514363
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,6144,12288,0.046561066309611
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,6144,10240,0.054789332548777256
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,6144,10240,0.0416159987449646
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,6144,8192,0.03859519958496094
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,6144,8192,0.03577386538187663
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,6144,7168,0.03457066615422567
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,7168,256,0.005682133138179779
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,6144,7168,0.03302719990412394
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,6144,6144,0.030817067623138426
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,6144,6144,0.030859732627868654
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,7168,128,0.005262933174769084
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,6144,5120,0.026149332523345947
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,6144,5120,0.0276202658812205
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,6144,4096,0.021861332654953002
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,6144,16384,0.05720959901809693
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,6144,4096,0.02515946626663208
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,6144,3584,0.019475199778874717
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,6144,12288,0.04453333218892415
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,6144,3584,0.023414399226506552
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,6144,3072,0.017197867234547935
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,6144,10240,0.03875733216603597
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,6144,3072,0.02239146629969279
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,6144,8192,0.03078293402989705
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,6144,2560,0.01453013320763906
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,6144,7168,0.027831466992696126
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,6144,2560,0.021607466538747153
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,6144,6144,0.024621866146723428
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,6144,2048,0.012337066729863485
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,6144,2048,0.020485333601633706
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,6144,1536,0.010038399696350097
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,6144,1536,0.019066667556762694
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,6144,1536,0.009731200337409974
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,6144,1024,0.007849599917729695
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,6144,1024,0.016301866372426352
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,6144,768,0.006760533154010773
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,6144,5120,0.02134933272997538
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,6144,768,0.016056533654530844
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,6144,512,0.004389333228270212
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,6144,512,0.01601920028527578
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,6144,4096,0.017692800362904867
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,6144,256,0.0037717332442601522
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,6144,256,0.01516800026098887
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,6144,3584,0.01628159979979197
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,6144,128,0.003357866654793421
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,6144,128,0.01508799990018209
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,6144,3072,0.014677332838376364
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,6144,64,0.003307733436425527
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,6144,2560,0.013185066978136697
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,6144,64,0.015097600221633912
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,6144,2048,0.011331199606259664
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,6144,32,0.0033728001018365227
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,6144,32,0.015155200163523355
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,5120,65536,0.1577888011932373
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,5120,65536,0.24266986846923827
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,6144,1024,0.007670400043328603
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,5120,65536,0.21368853251139322
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,5120,16384,0.06994453271230062
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,5120,16384,0.05636373360951742
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,5120,12288,0.055245868364969884
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,5120,12288,0.04220586617787679
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,6144,768,0.0069013332327206925
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,6144,512,0.006119466821352641
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,5120,10240,0.047926398118336995
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,5120,10240,0.03765759865442912
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,6144,256,0.005459199845790863
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,5120,8192,0.04054400126139323
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,5120,8192,0.03289706707000732
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,6144,128,0.005054933329423269
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,5120,7168,0.029819732904434203
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,5120,7168,0.030324266354242964
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,5120,6144,0.02650986711184184
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,5120,6144,0.028139734268188478
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,5120,5120,0.022563199202219643
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,5120,5120,0.025512532393137617
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,5120,4096,0.018592000007629395
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,5120,4096,0.023422932624816893
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,5120,16384,0.05479253530502319
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,5120,12288,0.04282666842142741
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,5120,3584,0.016752000649770102
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,5120,3584,0.022348799308141074
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,5120,3072,0.014672000209490457
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,5120,3072,0.02168853282928467
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,5120,10240,0.03682880004247029
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,5120,8192,0.02998720010121663
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,5120,2560,0.012492799758911132
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,5120,2560,0.020542933543523153
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,5120,7168,0.026848000288009644
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,5120,6144,0.02323306600252787
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,5120,2048,0.010690133770306904
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,5120,2048,0.019679999351501463
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,5120,5120,0.02007466753323873
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,5120,4096,0.017144532998402913
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,5120,1536,0.008805333574612936
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,5120,1536,0.017835734287897746
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,5120,1024,0.006997333467006683
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,5120,3584,0.015757866700490317
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,5120,1024,0.01579093337059021
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,5120,768,0.005677866439024607
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,5120,3072,0.014281599720319112
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,5120,768,0.015732266505559287
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,5120,512,0.00407679999868075
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,5120,512,0.015390933553377787
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,5120,256,0.003667200108369192
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,5120,256,0.015074132879575094
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,5120,2560,0.013051733374595642
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,5120,128,0.003316266586383184
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,5120,128,0.015067733327547708
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,5120,2048,0.010956799983978272
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,5120,64,0.0031690667072931922
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,5120,64,0.015074132879575094
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,5120,32,0.003252266595760981
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,5120,32,0.014903466900189719
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,5120,1536,0.009642666578292847
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,4096,65536,0.1979039986928304
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,4096,65536,0.13589332898457845
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,5120,1024,0.007483733197053273
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,4096,16384,0.05450559854507446
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,4096,65536,0.18404159545898438
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,5120,768,0.006852266689141591
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,4096,16384,0.04679786761601766
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,5120,512,0.00613973339398702
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,4096,12288,0.043635201454162595
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,4096,12288,0.03824533224105835
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,4096,12288,0.04034133354822795
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,4096,10240,0.03760960102081299
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,4096,10240,0.033378132184346515
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,4096,8192,0.031506133079528806
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,5120,256,0.005474133292833964
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,4096,8192,0.02924906611442566
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,5120,128,0.00514026681582133
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,4096,7168,0.02864426573117574
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,4096,7168,0.02704426646232605
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,4096,6144,0.02572159965833028
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,4096,6144,0.02542720039685567
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,4096,5120,0.01885333259900411
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,4096,5120,0.023782400290171306
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,4096,16384,0.046882132689158126
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,4096,4096,0.015321600437164306
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,4096,4096,0.022182399034500123
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,4096,10240,0.03224959969520569
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,4096,3584,0.013517866532007853
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,4096,3584,0.021241599321365358
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,4096,3072,0.012050132950146992
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,4096,3072,0.020533333222071327
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,4096,3072,0.013455999890963235
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,4096,8192,0.026451200246810913
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,4096,2560,0.010699733098347982
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,4096,2560,0.01952000061670939
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,4096,2048,0.009251200159390767
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,4096,2048,0.01854613423347473
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,4096,1536,0.007877333462238312
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,4096,1536,0.016215466459592185
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,4096,7168,0.02380266586939494
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,4096,1024,0.0062720000743865965
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,4096,1024,0.016235733032226564
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,4096,6144,0.021014400323232017
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,4096,768,0.004349866509437561
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,4096,5120,0.01839039921760559
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,4096,768,0.016083199779192606
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,4096,512,0.003819733361403147
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,4096,512,0.015564800302187601
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,4096,4096,0.015893333156903586
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,4096,256,0.0034506666163603462
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,4096,256,0.014940800269444785
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,4096,3584,0.014460800091425577
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,4096,128,0.0031498665610949195
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,4096,128,0.014762666821479798
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,4096,2560,0.01172160009543101
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,4096,64,0.0030464000999927522
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,4096,2048,0.010417067011197408
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,4096,64,0.01511146624883016
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,4096,32,0.003433600068092346
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,4096,32,0.01518186628818512
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,4096,1536,0.00864533285299937
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,3584,65536,0.181222407023112
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,3584,65536,0.12855359713236492
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,3584,16384,0.05409493446350098
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,3584,16384,0.04448426564534505
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,3584,12288,0.038983468214670816
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,3584,12288,0.036909866333007815
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,4096,1024,0.007147733370463054
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,3584,10240,0.03423146804173787
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,4096,768,0.006751999755700429
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,3584,10240,0.031268266836802165
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,3584,10240,0.030763733386993408
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,3584,8192,0.028700800736745198
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,4096,512,0.006072533130645752
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,3584,8192,0.027460267146428425
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,3584,7168,0.02358293334643046
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,3584,7168,0.025671466191609697
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,3584,6144,0.020866133769353232
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,3584,6144,0.024245333671569825
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,4096,256,0.005458133419354757
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,4096,128,0.00516480008761088
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,3584,5120,0.01669013301531474
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,3584,5120,0.02253119945526123
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,3584,65536,0.16653226216634115
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,3584,16384,0.04598933458328247
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,3584,4096,0.014627200365066529
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,3584,12288,0.03635840018590291
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,3584,4096,0.02142933408419291
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,3584,3584,0.012301866213480632
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,3584,3584,0.020539732774098714
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,3584,3584,0.014110933740933737
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,3584,3072,0.011117866635322571
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,3584,3072,0.019874133666356406
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,3584,8192,0.026554665962855023
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,3584,2560,0.009830400347709656
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,3584,7168,0.02329813241958618
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,3584,2560,0.01927786668141683
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,3584,2048,0.008568533261617025
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,3584,2048,0.01745706597963969
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,3584,2048,0.010100266337394715
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,3584,6144,0.02044586737950643
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,3584,1536,0.007316266496976216
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,3584,1536,0.016196266810099284
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,3584,1024,0.0053151999910672505
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,3584,1024,0.015874133507410685
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,3584,5120,0.01789120038350423
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,3584,768,0.004268800218900045
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,3584,768,0.015305599570274353
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,3584,512,0.003772799919048945
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,3584,512,0.015250133474667868
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,3584,4096,0.015267200271288552
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,3584,256,0.0033632000287373864
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,3584,256,0.015056000153223673
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,3584,3072,0.012934399644533792
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,3584,128,0.0032479998966058097
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,3584,128,0.014854400356610616
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,3584,64,0.003020799905061722
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,3584,64,0.014824533462524414
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,3584,2560,0.011658666531244914
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,3584,32,0.003142400085926056
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,3584,32,0.014798933267593383
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,3072,65536,0.15702932675679523
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,3072,65536,0.1139957348505656
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,3072,16384,0.04640959898630778
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,3584,1536,0.008309333523114523
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,3072,16384,0.040625067551930745
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,3584,1024,0.006962133447329204
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,3072,12288,0.03461653391520182
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,3072,12288,0.034619732697804766
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,3584,768,0.006551466882228851
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,3072,10240,0.029756800333658857
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,3072,10240,0.030006400744120282
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,3584,512,0.005884799857934316
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,3072,8192,0.02523946762084961
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,3072,8192,0.02701866626739502
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,3584,256,0.005371733506520589
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,3072,7168,0.02189013361930847
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,3072,7168,0.02458560069402059
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,3584,128,0.005123200019200643
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,3072,6144,0.01943146586418152
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,3072,65536,0.18695467313130695
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,3072,6144,0.02354453404744466
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,3072,16384,0.048956799507141116
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,3072,5120,0.01744426687558492
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,3072,5120,0.022536534070968627
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,3072,4096,0.013446399569511413
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,3072,4096,0.020891733964284263
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,3072,3584,0.011426132917404175
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,3072,3584,0.020333866278330483
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,3072,12288,0.037562668323516846
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,3072,3072,0.0102101335922877
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,3072,3072,0.019474132855733236
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,3072,10240,0.033317333459854125
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,3072,2560,0.009124267101287841
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,3072,8192,0.026718932390213012
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,3072,2560,0.01830613414446513
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,3072,2048,0.007969066500663757
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,3072,2048,0.016909867525100708
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,3072,2048,0.010140800476074218
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,3072,7168,0.024318933486938477
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,3072,1536,0.0067445332805315655
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,3072,1536,0.016452266772588094
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,3072,1024,0.004791466891765595
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,3072,1024,0.01586560010910034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,3072,6144,0.02115946610768636
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,3072,768,0.004211199780305227
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,3072,5120,0.0185589333375295
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,3072,768,0.01560533344745636
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,3072,512,0.0037546666959921518
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,3072,4096,0.015506133437156677
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,3072,512,0.015170133113861084
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,3072,256,0.0033941333492596946
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,3072,256,0.014776532848676046
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,3072,3584,0.014427733421325684
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,3072,256,0.005251200000445048
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,3072,128,0.0031829332311948144
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,3072,128,0.014589866995811463
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,3072,64,0.0029493334392706556
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,3072,3072,0.013124266266822815
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,3072,64,0.014734933773676554
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,3072,32,0.0031167998909950255
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,3072,2560,0.011802666385968526
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,3072,32,0.015018666783968607
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,2560,65536,0.13637545903523762
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,2560,65536,0.10055147012074787
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,2560,16384,0.03674559990564982
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,2560,65536,0.16807360649108888
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,2560,16384,0.036756265163421634
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,3072,1536,0.008474666873613994
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,3072,1024,0.0073088000218073535
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,2560,12288,0.029589333136876422
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,2560,12288,0.032128000259399415
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,3072,768,0.006592000027497609
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,2560,10240,0.02573866645495097
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,3072,512,0.005818666517734527
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,2560,10240,0.028568534056345622
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,2560,8192,0.02137920061747233
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,3072,128,0.004949333270390829
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,2560,8192,0.02646506627400716
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,2560,7168,0.01950613260269165
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,2560,7168,0.02446933388710022
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,2560,6144,0.01733760039011637
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,2560,6144,0.023462400833765665
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,2560,5120,0.013193600376447043
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,2560,5120,0.0210591991742452
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,2560,16384,0.044609065850575766
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,2560,4096,0.012412800391515096
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,2560,12288,0.035709865887959796
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,2560,4096,0.020371200640996297
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,2560,3584,0.011351466178894043
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,2560,3584,0.019348265727361043
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,2560,3072,0.010491733749707539
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,2560,3072,0.019323732455571493
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,2560,10240,0.030692267417907714
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,2560,2560,0.008524800340334576
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,2560,2560,0.017564799388249716
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,2560,8192,0.025973333915074663
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,2560,7168,0.022132267554601036
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,2560,6144,0.019789866606394448
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,2560,2048,0.007468800246715546
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,2560,2048,0.017018665870030723
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,2560,2048,0.00913706620534261
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,2560,1536,0.006358399987220764
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,2560,3584,0.013372799754142762
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,2560,5120,0.0178272008895874
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,2560,1536,0.01662720044453939
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,2560,4096,0.01443839967250824
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,2560,1024,0.004453333218892416
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,2560,2560,0.010769066214561463
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,2560,1024,0.015943466623624166
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,2560,3072,0.011851732929547627
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,2560,768,0.004206933577855428
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,2560,768,0.016103466351826988
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,2560,768,0.006353066861629486
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,2560,512,0.0037994667887687682
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,2560,512,0.015505066514015198
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,2560,256,0.0033717334270477297
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,2560,256,0.014859732985496522
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,2560,1536,0.00795839975277583
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,2560,64,0.002903466671705246
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,2560,128,0.0031648000081380212
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,2560,1024,0.006758399804433187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,2560,128,0.014750933647155762
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,2560,64,0.014959999918937683
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,2560,512,0.005831466615200042
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,2560,32,0.003036800026893616
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,2560,32,0.014917332927385965
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,2560,256,0.005357866485913595
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,2048,65536,0.10675626595815021
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,2048,65536,0.08497493267059326
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,2048,16384,0.03074986735979716
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,2048,16384,0.03277759949366252
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,2048,12288,0.02499306599299113
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,2560,128,0.005031466484069824
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,2048,12288,0.028250666459401448
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,2048,10240,0.021574399868647256
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,2048,65536,0.1648853302001953
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,2048,10240,0.025836799542109174
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,2048,16384,0.04370559851328532
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,2048,8192,0.017656532923380534
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,2048,10240,0.029383466641108198
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,2048,8192,0.02399253249168396
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,2048,12288,0.03333013256390889
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,2048,7168,0.016149333119392394
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,2048,5120,0.012430933117866517
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,2048,7168,0.022679466009140014
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,2048,6144,0.014300800363222756
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,2048,6144,0.021766400337219237
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,2048,8192,0.023970133066177367
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,2048,7168,0.021800533930460612
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,2048,5120,0.020999467372894286
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,2048,4096,0.010730666915575664
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,2048,4096,0.014472533265749613
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,2048,4096,0.0200437327226003
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,2048,6144,0.01920213301976522
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,2048,5120,0.016341333587964378
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,2048,3584,0.009648000200589497
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,2048,3584,0.0194048007329305
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,2048,3584,0.01323199967543284
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,2048,3072,0.00904960036277771
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,2048,3072,0.018169599771499633
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,2048,2560,0.008210133512814839
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,2048,2560,0.017280000448226928
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,2048,3072,0.011858133474985759
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,2048,2048,0.007356800138950348
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,2048,2048,0.016790399948755898
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,2048,2560,0.010431999961535137
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,2048,1536,0.005734399954477946
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,2048,1536,0.01602240006128947
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,2048,1024,0.004598399996757508
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,2048,2048,0.009405866265296936
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,2048,1024,0.015701333681742348
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,2048,1536,0.00808426688114802
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,2048,768,0.004099199920892716
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,2048,768,0.015945600469907124
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,2048,1024,0.006822399795055389
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,2048,512,0.0037119999527931214
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,2048,512,0.014983466267585755
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,2048,768,0.006382933259010315
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,2048,256,0.003291733314593633
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,2048,256,0.014716800053914389
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,2048,256,0.005165866514046987
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,2048,128,0.0030261332790056865
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,2048,512,0.005799466868241628
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,2048,128,0.014593066771825156
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,2048,64,0.002924799919128418
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,2048,64,0.01476479967435201
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,2048,128,0.004985600213209788
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,2048,32,0.0030933332939942675
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,2048,32,0.01495680014292399
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,1536,65536,0.0834656000137329
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,1536,65536,0.07597333590189616
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,1536,16384,0.025059199333190917
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,1536,10240,0.017498666048049928
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,1536,65536,0.13768107096354168
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,1536,16384,0.02906559904416402
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,1536,12288,0.019826134045918785
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,1536,12288,0.0265066663424174
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,1536,16384,0.037937064965565995
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,1536,10240,0.025049599011739095
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,1536,8192,0.021810134251912437
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,1536,10240,0.025483733415603636
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,1536,12288,0.029865600665410358
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,1536,7168,0.019368533293406168
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,1536,6144,0.021463465690612794
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,1536,8192,0.014248533050219217
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,1536,8192,0.022920533021291097
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,1536,7168,0.013964800039927163
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,1536,7168,0.02211839954058329
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,1536,6144,0.012481066584587096
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,1536,6144,0.017145599921544394
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,1536,5120,0.011546666423479717
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,1536,5120,0.02066453297932943
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,1536,4096,0.009594666957855224
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,1536,4096,0.018772266308466592
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,1536,5120,0.014658133188883463
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,1536,3584,0.008935466408729553
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,1536,3584,0.018230400482813516
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,1536,4096,0.012461866935094197
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,1536,3584,0.011495467027028401
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,1536,3072,0.00823466678460439
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,1536,3072,0.01739306648572286
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,1536,2560,0.007259733478228251
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,1536,2560,0.01687893271446228
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,1536,2048,0.006292266647020976
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,1536,3072,0.010442666212717692
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,1536,2048,0.01664959987004598
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,1536,2560,0.009617066383361817
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,1536,1536,0.005208533505598704
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,1536,1536,0.016406400005022685
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,1536,2048,0.008254933357238769
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,1536,1024,0.004496000210444133
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,1536,1536,0.007503999769687653
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,1536,1024,0.015639467040697734
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,1536,768,0.0042357335488001505
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,1536,768,0.015528532862663268
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,1536,768,0.005924266576766968
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,1536,512,0.0036938667297363283
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,1536,1024,0.0063381334145863845
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,1536,512,0.014997333288192749
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,1536,256,0.0032981333633263906
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,1536,256,0.01480959951877594
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,1536,512,0.005419733126958212
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,1536,128,0.003050666550795237
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,1536,128,0.014876799782117209
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,1536,256,0.005133866767088572
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,1536,64,0.0028575999041398365
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,1536,128,0.004875733455022176
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,1536,64,0.014763733744621277
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,1536,32,0.0030037333567937215
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,1536,32,0.01461120049158732
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,1024,65536,0.05633386770884195
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,1024,65536,0.06111679871877035
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,1024,16384,0.018595200777053834
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,1024,65536,0.1210250695546468
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,1024,16384,0.02555946707725525
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,1024,16384,0.03212479948997497
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,1024,12288,0.014441600441932679
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,1024,12288,0.023821866512298583
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,1024,10240,0.013554132978121438
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,1024,10240,0.022754132747650146
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,1024,12288,0.02483946681022644
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,1024,8192,0.012162133057912191
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,1024,8192,0.02115946610768636
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,1024,10240,0.02177600065867106
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,1024,7168,0.011725866794586181
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,1024,8192,0.018224000930786133
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,1024,7168,0.021450666586558025
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,1024,6144,0.010567466417948406
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,1024,6144,0.02103360096613566
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,1024,7168,0.016546133160591125
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,1024,5120,0.009827199578285217
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,1024,6144,0.014012799660364787
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,1024,5120,0.019428267081578573
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,1024,5120,0.012486400206883748
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,1024,4096,0.00844586690266927
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,1024,4096,0.018104533354441323
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,1024,4096,0.010085333387056987
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,1024,3584,0.008605866630872091
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,1024,3584,0.017917867501576742
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,1024,3584,0.009559466441472372
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,1024,3072,0.007739733159542084
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,1024,3072,0.017343999942143758
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,1024,3072,0.008642133076985676
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,1024,2560,0.006259199976921081
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,1024,2560,0.017100799083709716
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,1024,2048,0.005593599875768026
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,1024,1024,0.004163199911514918
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,1024,2560,0.008381866415341695
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,1024,2048,0.01676586667696635
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,1024,2048,0.0073749333620071415
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,1024,1536,0.004910933474699656
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,1024,1536,0.015799466768900552
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,1024,1024,0.015542399883270264
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,1024,1024,0.005948799848556519
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,1024,1536,0.006941866874694824
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,1024,768,0.0038773333032925926
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,1024,768,0.015381333231925965
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,1024,512,0.003502933432658514
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,1024,768,0.0056202664971351625
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,1024,512,0.0150218665599823
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,1024,256,0.0031850665807724
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,1024,512,0.00528106689453125
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,1024,256,0.014852266510327658
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,1024,128,0.0029205332199732465
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,1024,128,0.014633599917093912
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,1024,256,0.005027199784914652
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,1024,64,0.002810666710138321
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,1024,128,0.004853333532810211
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,1024,64,0.014677332838376364
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,1024,32,0.002757333219051361
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,1024,32,0.014757333199183145
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,768,12288,0.012217600146929424
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,768,65536,0.04556373357772827
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,768,65536,0.05615253448486328
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,768,65536,0.10740479628245037
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,768,16384,0.015733333428700765
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,768,16384,0.02500480016072591
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,768,16384,0.026502400636672974
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,768,12288,0.022702932357788086
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,768,10240,0.011173333724339802
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,768,10240,0.0214954674243927
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,768,12288,0.022810665766398112
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,768,8192,0.010923733313878376
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,768,8192,0.020451200008392335
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,768,10240,0.020169599850972494
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,768,7168,0.010384000341097514
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,768,5120,0.00881706674893697
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,768,8192,0.015131733814875283
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,768,7168,0.01983893314997355
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,768,7168,0.014056533575057983
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,768,6144,0.009609599908192951
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,768,6144,0.018730666240056357
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,768,5120,0.019463467597961425
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,768,6144,0.012501333157221475
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,768,5120,0.010961066683133442
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,768,4096,0.00765119989713033
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,768,4096,0.018437333901723228
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,768,3584,0.006977066894372304
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,768,3584,0.01745706597963969
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,768,4096,0.009648000200589497
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,768,3584,0.00942080020904541
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,768,3072,0.006618666648864746
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,768,3072,0.017427200078964235
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,768,3072,0.008364799618721008
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,768,1536,0.004850133260091146
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,768,2560,0.006202666461467743
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,768,2560,0.01676373283068339
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,768,2560,0.007995733122030894
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,768,2048,0.005533866584300995
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,768,1024,0.005835733314355215
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,768,2048,0.016489600141843162
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,768,2048,0.007214933137098948
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,768,1536,0.015953066945075988
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,768,1024,0.004208000004291534
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,768,1536,0.00678719977537791
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,768,1024,0.015557333827018738
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,768,512,0.0052821333209673565
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,768,768,0.0038474666575590765
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,768,768,0.015176533659299215
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,768,768,0.005611733098824819
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,768,512,0.003487999985615412
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,768,128,0.01461013356844584
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,768,512,0.015204266707102457
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,768,256,0.0032170665760835014
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,768,256,0.014642133315404256
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,768,256,0.0049002667268117275
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,768,128,0.0029056000212828318
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,512,65536,0.04937386512756348
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,768,128,0.0046741331617037455
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,768,64,0.0027562665442625684
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,768,64,0.014617600043614707
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,768,32,0.002834133307139079
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,512,12288,0.010845866799354554
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,512,16384,0.025997867186864216
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,768,32,0.014443733294804893
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,512,65536,0.033817601203918454
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,512,65536,0.09619839986165366
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,512,16384,0.01242026686668396
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,512,16384,0.022352000077565513
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,512,12288,0.020895999670028687
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,512,12288,0.020518400271733604
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,512,10240,0.010193066795667012
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,512,10240,0.019825067122777304
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,512,8192,0.009212799866994222
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,512,10240,0.01716586748758952
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,512,8192,0.019307732582092285
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,512,7168,0.008449066678682964
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,512,8192,0.014640000462532044
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,512,7168,0.019766400257746376
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,512,7168,0.013478400309880576
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,512,6144,0.007567999760309856
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,512,6144,0.01918826699256897
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,512,5120,0.00691840002934138
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,512,6144,0.012228266398111979
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,512,5120,0.018835200866063436
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,512,5120,0.01074773371219635
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,512,4096,0.006427733103434245
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,512,4096,0.018338133891423546
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,512,3584,0.006763733426729838
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,512,3584,0.017670400937398276
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,512,3584,0.009191466371218364
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,512,3072,0.006379733482996623
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,512,4096,0.00959999958674113
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,512,3072,0.017197867234547935
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,512,3072,0.008317866424719492
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,512,2560,0.006318933268388112
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,512,2560,0.01689066688219706
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,512,2560,0.007921066880226136
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,512,2048,0.005549866457780202
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,512,2048,0.016309332847595216
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,512,2048,0.0070271998643875126
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,512,1536,0.004833066463470459
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,512,1536,0.016130133469899496
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,512,1024,0.004324266811211904
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,512,1024,0.015546666582425437
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,512,768,0.003804799914360046
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,512,1536,0.006636799871921539
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,512,768,0.01578133304913839
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,512,768,0.0053845331072807315
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,512,512,0.003487999985615412
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,512,128,0.00290133332212766
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,512,1024,0.0058229332168896995
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,512,128,0.014628266294797262
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,512,512,0.015037866433461508
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,512,256,0.0030410667260487873
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,512,256,0.014800000190734863
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,512,256,0.004752000172932943
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,512,128,0.004648533463478088
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,512,64,0.0028181334336598715
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,512,64,0.014601600170135499
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,512,32,0.0028042666614055633
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,512,32,0.014526933431625366
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,256,65536,0.021985065937042237
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,256,65536,0.03995093504587809
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,256,65536,0.08610773086547852
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,256,16384,0.009129599730173747
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,256,16384,0.02035413384437561
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,256,16384,0.024338134129842124
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,512,512,0.005171200136343638
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,256,12288,0.008035199840863545
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,256,12288,0.02007253368695577
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,256,12288,0.01940586765607198
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,256,10240,0.007250133156776428
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,256,10240,0.02031893332799276
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,256,8192,0.0066890666882197065
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,256,8192,0.018732800086339315
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,256,7168,0.006502399841944377
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,256,10240,0.01681813398996989
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,256,7168,0.0192522664864858
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,256,7168,0.013218133648236593
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,256,8192,0.014600533246994018
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,256,6144,0.00618453323841095
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,256,6144,0.019061332941055296
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,256,5120,0.006602666775385539
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,256,5120,0.018879999717076622
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,256,5120,0.010618666807810467
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,256,4096,0.006138666470845541
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,256,4096,0.018068265914916993
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,256,3584,0.006632533172766368
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,256,3584,0.017799466848373413
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,256,3072,0.00631466656923294
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,256,3072,0.01694399913152059
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,256,3072,0.008332799871762593
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,256,2560,0.006055466830730438
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,256,2560,0.016403200229008992
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,256,2048,0.0053375999132792154
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,256,6144,0.011997866630554199
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,256,2048,0.016425599654515587
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,256,1536,0.004695466657479604
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,256,1536,0.015599999825159708
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,256,1536,0.00664213349421819
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,256,1024,0.0040448000033696495
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,256,1024,0.01588266690572103
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,256,768,0.0037418665985266366
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,256,768,0.015717333555221556
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,256,768,0.0053951998551686605
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,256,512,0.0033589333295822145
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,256,512,0.014964266618092855
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,256,4096,0.009613866607348125
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,256,3584,0.009114666779836019
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,256,256,0.003049599876006444
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,256,128,0.0029130667448043824
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,256,2560,0.007949866851170858
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,256,256,0.014831999937693277
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,256,2048,0.0070709332823753355
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,256,1024,0.0057770664493242896
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,256,128,0.014528000354766845
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,256,128,0.004676266511281332
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,256,64,0.0027061333258946735
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,256,64,0.014538666605949402
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,256,32,0.0026709333062171934
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,256,32,0.01451520025730133
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,128,65536,0.012257066369056702
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,128,65536,0.03370240132013957
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,128,65536,0.08759786287943522
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,128,16384,0.007045333087444305
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,256,512,0.0050805335243542995
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,128,16384,0.02042986750602722
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,128,16384,0.02440746625264486
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,256,256,0.004739200075467428
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,128,12288,0.0063178668419520065
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,128,12288,0.019556266069412232
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,128,12288,0.01949653426806132
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,128,10240,0.006563200056552887
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,128,7168,0.0062730665008227035
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,128,10240,0.019720532496770225
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,128,10240,0.017025067408879598
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,128,8192,0.006472533444563548
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,128,8192,0.018837332725524902
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,128,8192,0.014505599935849508
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,128,7168,0.019163733720779418
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,128,7168,0.013314132889111837
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,128,6144,0.0060810665289560955
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,128,6144,0.018467199802398682
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,128,6144,0.011946666240692138
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,128,5120,0.006482133269309997
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,128,3584,0.01750719944636027
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,128,5120,0.018948266903559365
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,128,5120,0.01066986620426178
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,128,3072,0.0172650674978892
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,128,4096,0.00600853314002355
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,128,4096,0.018169599771499633
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,128,4096,0.00942186713218689
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,128,3584,0.006485333542029063
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,128,3584,0.009106133381525676
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,128,3072,0.006175999840100607
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,128,3072,0.008140799899895985
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,128,2560,0.0060703997810681665
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,128,2560,0.01643519997596741
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,128,1536,0.01649066706498464
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,128,2048,0.005399466554323832
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,128,1024,0.003985066711902618
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,128,2048,0.016082132856051125
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,128,1536,0.004747733473777771
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,128,1024,0.015415466825167336
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,128,1024,0.005819733440876007
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,128,768,0.00360000009338061
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,128,768,0.015434666474660238
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,128,2560,0.007814399898052216
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,128,768,0.005465599894523621
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,128,512,0.0033941333492596946
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,128,512,0.015041066209475198
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,128,128,0.0028031999866167706
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,128,512,0.005037866532802582
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,128,256,0.0029813334345817565
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,128,256,0.014779733618100485
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,128,2048,0.007044266661008198
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,128,128,0.014389333128929139
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,128,64,0.002726399898529053
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,128,64,0.014479999740918478
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,128,32,0.0027061333258946735
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,128,32,0.01446399986743927
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,64,65536,0.009299199779828389
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,128,1536,0.006807466844717662
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,64,65536,0.03179200092951457
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,64,16384,0.006258133550484974
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,64,10240,0.0198634664217631
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,64,16384,0.020165334145228066
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,64,12288,0.006192000210285186
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,64,12288,0.019411200284957887
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,64,10240,0.006364800035953522
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,64,8192,0.00625600020090739
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,128,256,0.004785066843032837
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,64,8192,0.018576000134150186
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,96,128,128,0.004644266764322917
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,64,7168,0.006229333579540253
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,64,7168,0.019316265980402626
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,64,5120,0.006409599880377452
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,64,5120,0.01865066687266032
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,64,6144,0.01873706579208374
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,64,6144,0.006038400034109751
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,64,4096,0.006077866752942403
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,64,4096,0.01803306738535563
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,64,3584,0.006405333181222279
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,64,3584,0.017417599757512413
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,64,3072,0.006040533383687338
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,64,3072,0.017026132345199584
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,64,2560,0.006030933558940887
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,64,1024,0.003984000037113826
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,64,2560,0.01652906636397044
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,64,2048,0.005346133311589559
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,64,2048,0.016684800386428833
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,64,1536,0.004715733230113983
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,64,1536,0.01574720044930776
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,64,768,0.0036650667587916053
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,64,1024,0.015427199999491372
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,64,768,0.015058133006095886
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,64,512,0.0033674667278925574
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,64,256,0.0030080000559488933
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,64,512,0.015044266978899637
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,64,256,0.014453333616256715
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,32,65536,0.009522133072217305
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,64,128,0.0028149334092934927
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,64,128,0.01439466675122579
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,64,64,0.0027093333502610523
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,64,64,0.014528000354766845
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,64,32,0.0027221334477265675
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,64,32,0.014167466759681701
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,32,65536,0.03196693261464437
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,32,16384,0.0061941335598627726
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,32,12288,0.006066133578618368
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,32,16384,0.020241065820058187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,32,12288,0.019834667444229126
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,32,10240,0.0063391998410224915
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,32,10240,0.019792000452677407
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,32,8192,0.006131199995676676
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,32,8192,0.0184608002503713
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,32,7168,0.006056533257166544
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,32,7168,0.018899200359980266
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,32,6144,0.005932799975077311
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,32,6144,0.01872533361117045
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,32,5120,0.006307200094064077
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,32,5120,0.01911999980608622
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,32,4096,0.005916800101598104
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,32,4096,0.018389334281285606
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,32,3584,0.006178133189678192
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,32,3584,0.01762666702270508
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,32,3072,0.005838933090368906
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,32,3072,0.01734506686528524
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,32,2560,0.006105599800745646
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,32,2560,0.01652906636397044
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,32,2048,0.005390933156013489
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,32,2048,0.015920000274976094
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,32,1536,0.004658133288224538
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,32,1536,0.01562346617380778
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,32,1024,0.0040501333773136135
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,32,1024,0.015450666348139444
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,32,768,0.0037098666032155357
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,32,768,0.01516800026098887
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,32,512,0.003328000009059906
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,32,512,0.014947199821472168
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,32,32,0.002619733413060506
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,32,256,0.003028266628583272
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,32,256,0.014870400230089823
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,32,128,0.0028021333118279776
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,32,128,0.014434132973353067
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,96,32,64,0.002647466709216436
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,32,64,0.014514133334159851
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,96,32,32,0.01439573367436727
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,65536,16384,0.41022294362386064
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,65536,16384,0.37457386652628577
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,65536,16384,0.774561055501302
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,65536,12288,0.3137951850891113
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,65536,12288,0.5915264129638672
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,65536,10240,0.2656991958618164
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,65536,12288,0.2888277371724447
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,65536,10240,0.4938357353210449
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,65536,10240,0.24548800786336264
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,65536,8192,0.39401280085245766
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,65536,8192,0.24387839635213218
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,65536,8192,0.19551572799682618
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,65536,7168,0.1721354643503825
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,65536,7168,0.19359893798828126
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,65536,7168,0.34565760294596354
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,65536,6144,0.16814613342285156
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,65536,6144,0.2986975987752279
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,65536,6144,0.14821972846984863
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,65536,5120,0.25041813850402833
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,65536,5120,0.1438485304514567
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,65536,5120,0.12659520308176678
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,65536,4096,0.20578452746073403
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,65536,4096,0.1338666598002116
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,65536,4096,0.10259413719177246
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,65536,3584,0.18350079854329426
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,65536,3584,0.10748373667399089
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,65536,3584,0.10282453695933025
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,65536,3072,0.1547829310099284
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,65536,3072,0.09529386361440023
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,65536,3072,0.0808618704477946
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,65536,2560,0.13062506516774494
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,65536,2560,0.08237333297729492
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,65536,2560,0.06996800104777018
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,65536,2048,0.10450773239135742
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,65536,2048,0.06985066731770834
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,65536,1536,0.08218986988067627
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,65536,1536,0.05821333328882853
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,65536,1536,0.046187734603881835
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,65536,768,0.03939733505249023
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,65536,1024,0.05704106489817301
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,65536,1024,0.04518186648686727
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,65536,1024,0.03418879906336467
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,65536,768,0.045881601174672444
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,65536,768,0.030461867650349934
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,65536,512,0.03377386728922526
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,65536,512,0.031023999055226643
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,65536,2048,0.05762453476587931
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,65536,256,0.01908479928970337
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,65536,256,0.023431466023127238
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,65536,256,0.01752106746037801
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,65536,128,0.011225600043932598
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,65536,128,0.02068159977595011
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,65536,128,0.015305599570274353
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,65536,64,0.008772266904513042
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,65536,64,0.01898026665051778
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,65536,32,0.007353599866231282
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,65536,32,0.019341866175333657
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,65536,512,0.022957867383956908
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,16384,65536,0.3981781323750814
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,16384,65536,0.7618250528971354
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,16384,16384,0.1901408036549886
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,16384,16384,0.11318613688151043
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,16384,16384,0.10107946395874023
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,16384,10240,0.06680640379587809
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,16384,65536,0.3778463999430338
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,16384,12288,0.1443658669789632
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,16384,12288,0.0902997334798177
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,16384,10240,0.14068053563435873
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,16384,10240,0.07767893473307291
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,16384,8192,0.09760746955871583
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,16384,8192,0.06610453526178996
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,16384,8192,0.06212906837463379
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,16384,7168,0.08591573238372803
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,16384,7168,0.05962026516596476
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,16384,6144,0.07515412966410319
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,16384,6144,0.05345600048700968
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,16384,5120,0.06262186765670777
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,16384,12288,0.08912959893544516
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,16384,5120,0.047882668177286786
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,16384,5120,0.03736319939295451
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,16384,4096,0.051643733183542886
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,16384,4096,0.041211732228597
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,16384,7168,0.05221226612726847
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,16384,4096,0.031549866994222006
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,16384,3584,0.045315198103586835
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,16384,6144,0.043169065316518145
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,16384,3584,0.039018666744232176
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,16384,3584,0.028643200794855755
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,16384,3072,0.03985280195871989
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,16384,3072,0.034908799330393474
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,16384,3072,0.025867732365926106
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,16384,2560,0.03391040166219075
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,16384,2560,0.03182506759961446
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,16384,1536,0.016051200032234193
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,16384,2560,0.022514132658640544
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,16384,2048,0.027914667129516603
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,16384,2048,0.02829866607983907
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,16384,1536,0.02220906615257263
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,16384,1536,0.024315732717514037
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,16384,1024,0.015564800302187601
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,16384,1024,0.021348265806833903
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,16384,768,0.01209493378798167
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,16384,768,0.020080000162124634
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,16384,512,0.00914026697476705
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,16384,768,0.012019200126330058
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,16384,512,0.01823893388112386
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,16384,128,0.004092800120512644
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,16384,512,0.009708799918492635
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,16384,256,0.006358399987220764
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,16384,256,0.01597866714000702
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,16384,2048,0.019593600432078043
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,16384,128,0.015508266290028891
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,16384,64,0.0037664001186688742
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,16384,64,0.016062933206558227
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,16384,32,0.004106666644414266
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,16384,1024,0.012585600217183432
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,16384,32,0.01581013302008311
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,12288,16384,0.15429120063781737
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,12288,65536,0.571178690592448
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,12288,65536,0.3201354662577311
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,12288,65536,0.313592529296875
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,12288,16384,0.09502399762471517
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,12288,16384,0.08435200055440267
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,12288,12288,0.12021866639455159
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,16384,256,0.00738560010989507
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,12288,12288,0.08399466673533121
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,12288,12288,0.0678656021753947
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,12288,10240,0.10202240149180095
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,12288,10240,0.06535786787668864
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,16384,128,0.006782933572928111
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,12288,10240,0.05502400000890097
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,12288,8192,0.0794549306233724
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,12288,8192,0.05619946718215942
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,12288,7168,0.06932160059611002
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,12288,7168,0.05120746692021688
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,12288,6144,0.06066346565882365
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,12288,6144,0.04597440163294474
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,12288,5120,0.05099946657816569
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,12288,5120,0.04075413147608439
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,12288,4096,0.04172159830729167
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,12288,4096,0.03585280179977417
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,12288,8192,0.04492586851119995
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,12288,3584,0.0369322657585144
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,12288,3584,0.03333439826965332
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,12288,7168,0.03943253358205159
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,12288,6144,0.03498026529947917
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,12288,3072,0.032255999247233075
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,12288,3072,0.030931200583775836
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,12288,2560,0.027480532725652058
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,12288,2560,0.02752106587092082
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,12288,2560,0.01812160015106201
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,12288,2048,0.02296746571858724
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,12288,2048,0.025209599733352663
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,12288,1536,0.01806186636288961
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,12288,5120,0.030001066128412884
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,12288,4096,0.025491199890772503
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,12288,1536,0.022921599944432578
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,12288,1536,0.013010133306185404
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,12288,1024,0.012993066509564718
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,12288,3584,0.023162666956583658
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,12288,1024,0.020333866278330483
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,12288,768,0.010607999563217164
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,12288,768,0.018972800175348917
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,12288,512,0.008190933366616566
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,12288,512,0.016312533617019655
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,12288,512,0.00783679982026418
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,12288,3072,0.020677334070205687
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,12288,256,0.004542933404445648
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,12288,256,0.01583999991416931
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,12288,128,0.015130666891733804
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,12288,2048,0.01542080044746399
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,12288,128,0.003752533346414566
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,12288,64,0.0035637333989143372
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,12288,64,0.015583999951680503
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,12288,32,0.0037621334195137024
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,12288,32,0.015711999932924905
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,10240,16384,0.13506347338358562
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,10240,65536,0.2800256093343099
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,10240,65536,0.4905877431233724
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,10240,16384,0.0898517370223999
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,10240,16384,0.07591466903686524
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,12288,1024,0.010642133156458537
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,10240,12288,0.06466986735661825
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,10240,12288,0.12040533224741619
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,10240,12288,0.05918613274892172
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,10240,10240,0.08857279618581136
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,10240,10240,0.05591786702473959
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,12288,768,0.009520000219345093
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,10240,10240,0.05065813461939493
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,10240,8192,0.0758453369140625
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,10240,8192,0.04820693333943685
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,10240,8192,0.04165866772333781
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,10240,7168,0.0564298669497172
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,10240,7168,0.04343893527984619
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,12288,256,0.006442666550477346
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,12288,128,0.005860266586144766
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,10240,6144,0.04893546501795451
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,10240,6144,0.04029119809468587
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,10240,5120,0.04074560006459554
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,10240,5120,0.036184533437093096
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,10240,65536,0.278219731648763
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,10240,4096,0.03381653229395549
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,10240,4096,0.031890134016672775
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,10240,4096,0.02328426639238993
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,10240,3584,0.029918932914733888
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,10240,3584,0.031666133801142374
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,10240,7168,0.03583360115687052
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,10240,3072,0.0264138658841451
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,10240,3072,0.02757866581281026
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,10240,2560,0.022453333934148154
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,10240,2560,0.024988800287246704
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,10240,6144,0.031438932816187544
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,10240,2048,0.01872106591860453
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,10240,2048,0.02304853399594625
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,10240,1536,0.014800000190734863
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,10240,1536,0.022177066405614218
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,10240,1024,0.01111893355846405
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,10240,5120,0.02768426736195882
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,10240,1024,0.019718400637308755
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,10240,768,0.008949333429336548
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,10240,768,0.018236800034840902
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,10240,768,0.008525866270065307
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,10240,512,0.007275733351707459
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,10240,512,0.01637226641178131
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,10240,512,0.0071712002158164975
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,10240,256,0.004229333500067393
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,10240,3584,0.021260799964269002
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,10240,256,0.015523200233777365
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,10240,128,0.0037600000699361167
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,10240,128,0.015191466609636942
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,10240,3072,0.018947199980417887
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,10240,128,0.005376000205675761
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,10240,64,0.0035125332574049628
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,10240,64,0.01535040040810903
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,10240,32,0.0036085332433382668
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,10240,2560,0.016453333695729575
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,10240,32,0.01532586713631948
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,10240,2048,0.014101333419481912
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,10240,1536,0.011621333161989848
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,8192,65536,0.3937450726826986
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,10240,1024,0.009710933764775593
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,8192,16384,0.06525760094324748
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,8192,65536,0.2242527961730957
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,8192,16384,0.10411840279897053
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,8192,16384,0.06585066715876262
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,8192,12288,0.08388799826304118
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,10240,256,0.006087466577688853
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,8192,12288,0.05469333330790201
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,8192,10240,0.06416426499684652
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,8192,10240,0.04729493459065755
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,8192,8192,0.049534932772318525
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,8192,8192,0.04109546740849813
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,8192,7168,0.04376213153203328
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,8192,7168,0.03762666781743367
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,8192,6144,0.03849386771519979
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,8192,6144,0.034856534004211424
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,8192,65536,0.2419146696726481
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,8192,5120,0.032798933982849124
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,8192,5120,0.03138773242632548
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,8192,4096,0.027093333005905152
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,8192,4096,0.02749119997024536
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,8192,3584,0.024146133661270143
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,8192,3584,0.025796266396840413
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,8192,12288,0.05061013301213583
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,8192,10240,0.04368533293406169
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,8192,3072,0.021185066302617392
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,8192,3072,0.02423680027325948
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,8192,8192,0.03365333477656047
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,8192,3072,0.016847999890645345
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,8192,2560,0.01824959913889567
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,8192,2560,0.022797866662343343
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,8192,2048,0.014962133765220643
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,8192,7168,0.0308896005153656
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,8192,2048,0.021317332983016968
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,8192,1536,0.011826133728027344
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,8192,1536,0.02011093298594157
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,8192,6144,0.02695786754290263
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,8192,1024,0.00890239973862966
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,8192,1024,0.018206934134165444
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,8192,768,0.007578666508197785
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,8192,5120,0.023322665691375734
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,8192,768,0.015782399972279867
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,8192,768,0.008277333279450735
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,8192,512,0.005994666616121928
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,8192,512,0.015796266992886863
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,8192,256,0.004074666649103165
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,8192,256,0.015543466806411744
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,8192,256,0.006324266890684764
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,8192,128,0.003509333233038584
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,8192,128,0.015108266472816467
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,8192,4096,0.02020053267478943
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,8192,64,0.003335466732581457
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,8192,64,0.015546666582425437
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,8192,32,0.003642666588226954
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,8192,32,0.015426133076349893
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,8192,3584,0.01881386637687683
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,8192,2560,0.014936533570289613
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,7168,65536,0.3604650815327962
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,8192,2048,0.01285546620686849
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,7168,65536,0.2130634625752767
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,7168,16384,0.10540586312611896
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,7168,16384,0.05965119997660319
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,7168,16384,0.06878080368041992
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,7168,12288,0.06367040077845255
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,7168,12288,0.04945813417434693
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,7168,10240,0.05369386672973633
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,8192,1536,0.011143466830253601
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,7168,10240,0.04357866843541463
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,8192,1024,0.009174399574597676
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,7168,8192,0.04471893310546875
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,7168,8192,0.038039465745290116
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,8192,512,0.007227733234564463
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,7168,8192,0.03551679849624634
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,7168,7168,0.03947306474049886
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,8192,128,0.0057781333724657696
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,7168,7168,0.03521600166956584
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,7168,6144,0.03471680084864299
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,7168,5120,0.029046400388081865
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,7168,6144,0.03279786705970764
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,7168,6144,0.0273034671942393
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,7168,65536,0.24703359603881836
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,7168,5120,0.02961066762606303
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,7168,12288,0.051431465148925784
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,7168,4096,0.024452267090479533
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,7168,4096,0.027423999706904095
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,7168,10240,0.04550506671269734
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,7168,3584,0.021502933899561563
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,7168,3584,0.0248906672000885
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,7168,3072,0.019230933984120686
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,7168,3072,0.0232586661974589
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,7168,3072,0.017076265811920167
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,7168,2560,0.016500266393025716
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,7168,2560,0.02180160085360209
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,7168,2048,0.013525333007176718
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,7168,2048,0.02083946665128072
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,7168,7168,0.03180586695671082
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,7168,1536,0.010866133371988933
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,7168,1536,0.01927893360455831
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,7168,1024,0.008310399949550629
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,7168,1024,0.017046399911244712
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,7168,5120,0.02429973284403483
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,7168,768,0.007186133166154225
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,7168,768,0.01606933375199636
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,7168,4096,0.020822399854660036
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,7168,3584,0.01887786587079366
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,7168,512,0.004665599763393402
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,7168,512,0.01590933303038279
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,7168,256,0.0037216000258922578
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,7168,256,0.015333333611488342
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,7168,128,0.0034346667428811393
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,7168,128,0.015304533640543619
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,7168,2560,0.015196800231933594
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,7168,64,0.0033621333539485933
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,7168,64,0.015252266327540079
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,7168,32,0.0034826666116714476
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,7168,2048,0.012877866625785828
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,7168,32,0.015175466736157736
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,7168,1536,0.0105621337890625
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,6144,65536,0.2957749366760254
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,6144,65536,0.18268799781799316
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,6144,16384,0.09165439605712891
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,7168,1024,0.008518399794896443
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,6144,16384,0.05502613385518392
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,6144,16384,0.06181333462397257
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,6144,12288,0.06586560010910034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,6144,12288,0.04598613182703654
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,6144,10240,0.05555306673049927
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,6144,10240,0.04069013198216756
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,6144,8192,0.03829973141352336
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,7168,768,0.007750399907430013
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,6144,8192,0.03589653174082438
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,6144,7168,0.03389546473821004
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,6144,7168,0.032612266143163045
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,7168,512,0.006669866542021434
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,7168,256,0.005869866907596588
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,6144,7168,0.030614399909973146
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,6144,6144,0.03025280038515727
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,7168,128,0.005256533126036326
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,6144,6144,0.030456533034642536
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,6144,5120,0.025663999716440837
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,6144,5120,0.02730773289998372
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,6144,4096,0.021706666549046835
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,6144,65536,0.24291520118713378
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,6144,4096,0.024990934133529662
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,6144,4096,0.0197760005791982
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,6144,3584,0.02158720095952352
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,6144,12288,0.04948480129241943
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,6144,10240,0.04331200122833252
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,6144,3584,0.023517866929372154
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,6144,8192,0.03510080178578694
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,6144,3584,0.01816213329633077
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,6144,3072,0.01694186727205912
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,6144,3072,0.022835199038187662
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,6144,6144,0.02802986701329549
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,6144,5120,0.0230240007241567
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,6144,2560,0.01439466675122579
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,6144,2560,0.021398399273554483
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,6144,2048,0.01232319970925649
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,6144,2048,0.020292266209920248
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,6144,2048,0.012475732962290447
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,6144,1536,0.009986133376757304
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,6144,1536,0.01881813406944275
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,6144,1024,0.007885866860548655
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,6144,1024,0.015936000148455302
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,6144,768,0.006702933212121327
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,6144,768,0.01599679986635844
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,6144,512,0.004333866635958353
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,6144,512,0.015501866738001505
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,6144,3072,0.01649386684099833
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,6144,256,0.0036629334092140196
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,6144,256,0.015131733814875283
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,6144,128,0.003651199986537298
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,6144,128,0.014876799782117209
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,6144,64,0.0031786667803923286
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,6144,64,0.014954666296641031
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,6144,2560,0.014326399564743042
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,6144,32,0.0031871999303499854
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,6144,32,0.015034666657447815
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,5120,65536,0.24453760782877604
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,6144,1536,0.009939199686050415
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,5120,65536,0.15514453252156574
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,5120,16384,0.0695466677347819
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,5120,16384,0.054269866148630774
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,6144,1024,0.00826453318198522
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,5120,12288,0.05457280079523722
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,6144,768,0.007259733478228251
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,5120,12288,0.04106880029042562
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,6144,512,0.0062730665008227035
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,5120,10240,0.047237332661946616
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,5120,10240,0.03659093379974365
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,5120,8192,0.039773865540822344
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,5120,8192,0.03222186764081319
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,6144,256,0.005588266750176748
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,5120,8192,0.032857600847880045
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,5120,7168,0.029164799054463703
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,6144,128,0.005276800195376078
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,5120,7168,0.029876265923182172
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,5120,6144,0.025964800516764325
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,5120,6144,0.027497599522272747
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,5120,5120,0.022132267554601036
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,5120,5120,0.025464532772699992
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,5120,4096,0.018304000298182167
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,5120,65536,0.23107520739237467
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,5120,16384,0.060533332824707034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,5120,4096,0.023588265975316366
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,5120,3584,0.016360533237457276
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,5120,3584,0.022061866521835328
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,5120,3072,0.014574933052062988
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,5120,3072,0.021185066302617392
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,5120,12288,0.047893333435058597
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,5120,2560,0.012281599640846252
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,5120,10240,0.042444801330566405
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,5120,2560,0.020137600104014077
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,5120,2048,0.010481066505114238
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,5120,2048,0.019063466787338258
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,5120,7168,0.029297065734863282
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,5120,1536,0.00871573289235433
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,5120,6144,0.025522132714589436
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,5120,1536,0.017037866512934367
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,5120,1024,0.006922666728496551
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,5120,1024,0.016042666633923848
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,5120,5120,0.02181333303451538
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,5120,768,0.005458133419354757
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,5120,4096,0.019383466243743895
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,5120,768,0.015636266271273295
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,5120,3584,0.017605332533518474
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,5120,768,0.007429333527882893
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,5120,3072,0.015782399972279867
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,5120,512,0.0040277334551016486
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,5120,512,0.015421866377194723
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,5120,256,0.0036650667587916053
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,5120,256,0.015158399939537048
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,5120,2560,0.013966932892799377
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,5120,128,0.003291733314593633
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,5120,128,0.014881066481272378
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,5120,2048,0.011771733562151592
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,5120,64,0.003110400090614955
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,5120,64,0.015003732840220132
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,5120,32,0.0031061333914597826
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,5120,1536,0.00967680017153422
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,5120,32,0.015108266472816467
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,4096,65536,0.13134613037109374
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,4096,65536,0.20069546699523927
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,5120,1024,0.007870933413505555
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,4096,16384,0.05335893233617147
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,4096,16384,0.04607253472010295
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,4096,16384,0.05229653517405192
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,4096,12288,0.04251093467076619
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,4096,12288,0.03682666619618734
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,4096,10240,0.036583467324574785
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,4096,10240,0.032841600974400836
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,4096,8192,0.030691200494766237
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,5120,512,0.006267733375231425
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,4096,8192,0.02876799901326497
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,4096,7168,0.027771733204523724
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,5120,256,0.005644799768924713
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,4096,7168,0.026743467648824053
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,4096,6144,0.024932267268498738
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,4096,6144,0.025044266382853193
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,5120,128,0.005132799843947092
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,4096,5120,0.018710400660832724
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,4096,5120,0.023448532819747923
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,4096,4096,0.015319466590881348
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,4096,4096,0.02178666591644287
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,4096,65536,0.20258240699768065
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,4096,3584,0.015320533514022827
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,4096,12288,0.041859201590220135
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,4096,3584,0.0208512008190155
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,4096,3584,0.01539520025253296
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,4096,3072,0.01223360002040863
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,4096,10240,0.0366431991259257
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,4096,3072,0.020295466979344687
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,4096,2560,0.010709333419799804
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,4096,2560,0.019307732582092285
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,4096,8192,0.027191466093063353
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,4096,2048,0.009187199672063192
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,4096,2048,0.01839253306388855
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,4096,7168,0.024861866235733034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,4096,1536,0.00782719999551773
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,4096,1536,0.015799466768900552
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,4096,6144,0.021989333629608154
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,4096,1024,0.0060810665289560955
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,4096,1024,0.015826132893562318
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,4096,5120,0.019900800784428914
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,4096,768,0.004286933441956838
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,4096,768,0.015496533115704855
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,4096,4096,0.016203733285268147
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,4096,512,0.003868799904982249
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,4096,512,0.015366400281588236
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,4096,3072,0.013944533467292786
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,4096,2560,0.012444800138473511
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,4096,256,0.00335359995563825
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,4096,2048,0.010983467102050781
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,4096,256,0.015122133493423461
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,4096,1536,0.008770133058230083
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,4096,128,0.0032032000521818793
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,4096,128,0.014779733618100485
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,4096,64,0.003009066730737686
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,4096,1024,0.007397333284219106
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,4096,64,0.01495253344376882
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,4096,32,0.0033471999069054925
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,4096,32,0.015046399831771851
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,4096,768,0.00724480003118515
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,3584,65536,0.1839893341064453
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,4096,512,0.006180266539255777
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,3584,65536,0.12604693571726483
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,3584,16384,0.05015893379847208
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,3584,16384,0.04203093449274699
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,4096,256,0.005674666663010915
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,4096,128,0.005275733272234599
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,3584,12288,0.038846933841705324
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,3584,10240,0.030727465947469074
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,3584,12288,0.03600746790568034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,3584,10240,0.032808534304300946
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,3584,65536,0.18519147237141925
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,3584,7168,0.025081600745519
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,3584,8192,0.02808106740315755
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,3584,8192,0.027060266335805255
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,3584,16384,0.05045546690622965
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,3584,7168,0.025466666618982954
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,3584,6144,0.02259413401285807
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,3584,6144,0.02404586672782898
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,3584,12288,0.03951786756515503
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,3584,5120,0.016825600465138753
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,3584,10240,0.03512639999389648
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,3584,5120,0.02288960019747416
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,3584,4096,0.02102933327356974
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,3584,4096,0.01369599997997284
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,3584,8192,0.02889066735903422
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,3584,3584,0.012456533312797547
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,3584,3584,0.020435200134913126
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,3584,7168,0.024709333976109825
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,3584,3072,0.011087999741236369
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,3584,6144,0.021809067328770956
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,3584,2560,0.00983786682287852
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,3584,3072,0.019793067375818887
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,3584,2560,0.018920532862345376
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,3584,5120,0.018910932540893554
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,3584,2048,0.008648533622423809
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,3584,3584,0.015705600380897522
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,3584,4096,0.01646933356920878
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,3584,2048,0.01697173317273458
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,3584,1536,0.007438933352629344
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,3584,1024,0.005298133194446564
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,3584,1536,0.016357333461443583
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,3584,3072,0.013920000195503235
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,3584,1024,0.01581546664237976
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,3584,2560,0.012215466300646464
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,3584,768,0.004500266909599304
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,3584,768,0.015666133165359496
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,3584,2048,0.010786133011182149
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,3584,512,0.003730133424202601
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,3584,512,0.015146666765213012
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,3584,256,0.014803199966748556
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,3584,512,0.0060928001999855045
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,3584,1536,0.008643200000127155
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,3584,256,0.0034143999218940735
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,3584,128,0.0031466667850812277
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,3584,128,0.014552533626556396
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,3584,64,0.002918400118748347
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,3584,64,0.014642133315404256
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,3584,32,0.0030303999781608583
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,3584,1024,0.007229866584142049
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,3584,32,0.014685866236686707
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,3072,65536,0.15548799832661947
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,3072,16384,0.0450709342956543
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,3584,768,0.007083733379840851
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,3072,65536,0.11018986701965332
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,3072,16384,0.03866666555404663
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,3072,12288,0.03354346752166748
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,3072,12288,0.033421866099039715
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,3584,256,0.005522133409976959
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,3584,128,0.005172266562779745
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,3072,10240,0.02874026695887248
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,3072,10240,0.029918932914733888
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,3072,8192,0.02442986567815145
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,3072,65536,0.21422826449076332
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,3072,8192,0.02717439929644267
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,3072,16384,0.055417601267496744
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,3072,7168,0.021946666638056438
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,3072,6144,0.01953279972076416
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,3072,12288,0.04322346846262614
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,3072,7168,0.024392533302307128
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,3072,10240,0.03911573489507039
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,3072,6144,0.022959999243418374
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,3072,8192,0.032009599606196086
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,3072,5120,0.016821332772572837
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,3072,5120,0.021623466412226358
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,3072,5120,0.0202346662680308
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,3072,4096,0.01448853313922882
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,3072,4096,0.02044693430264791
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,3072,7168,0.02741866707801819
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,3072,4096,0.0180074671904246
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,3072,3584,0.01135040024916331
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,3072,6144,0.023690666755040488
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,3072,3584,0.020513067642847695
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,3072,3072,0.010253866513570149
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,3072,3072,0.019234132766723634
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,3072,2560,0.00918826659520467
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,3072,2560,0.018227199713389076
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,3072,2048,0.007945600152015685
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,3072,2048,0.016988799969355265
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,3072,3584,0.016471466422080992
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,3072,1536,0.007004799942175548
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,3072,1024,0.015568000078201295
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,3072,1536,0.01651306649049123
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,3072,1536,0.009075199564297993
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,3072,768,0.01562879979610443
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,3072,1024,0.004907733201980591
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,3072,768,0.004108799993991852
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,3072,3072,0.01460906664530436
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,3072,512,0.0037471999724706015
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,3072,512,0.015190399686495461
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,3072,2560,0.013012267152468362
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,3072,256,0.015006933609644571
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,3072,256,0.0033717334270477297
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,3072,2048,0.01111466685930888
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,3072,128,0.0030954666435718536
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,3072,64,0.002915200094381968
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,3072,1024,0.007355733215808869
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,3072,32,0.002963199963172277
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,3072,64,0.014904533823331198
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,3072,128,0.01474453310171763
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,3072,768,0.007000533243020375
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,3072,512,0.006061866879463196
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,2560,16384,0.036139734586079914
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,3072,32,0.014805333813031516
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,2560,16384,0.03498026529947917
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,2560,12288,0.028668800989786785
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,3072,256,0.005382399757703145
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,2560,65536,0.13478612899780273
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,2560,10240,0.02500266631444295
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,2560,65536,0.09658346970876058
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,3072,128,0.005085866649945577
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,2560,12288,0.030535467465718585
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,2560,10240,0.02646613319714864
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,2560,8192,0.0291103998819987
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,2560,65536,0.18626027107238768
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,2560,7168,0.023375999927520753
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,2560,8192,0.020984532435735066
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,2560,16384,0.052907733122507725
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,2560,8192,0.024119466543197632
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,2560,6144,0.022377600272496544
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,2560,12288,0.04085013469060262
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,2560,7168,0.018873600165049235
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,2560,10240,0.036304001013437906
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,2560,4096,0.011194666226704914
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,2560,6144,0.016724266608556113
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,2560,4096,0.016875733931859337
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,2560,3584,0.010331733028093974
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,2560,5120,0.013299199938774108
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,2560,5120,0.021585067113240562
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,2560,4096,0.019933867454528808
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,2560,7168,0.025975465774536133
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,2560,6144,0.023197867472966514
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,2560,3584,0.019347200791041054
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,2560,5120,0.0196341335773468
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,2560,3072,0.009292800227801006
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,2560,3072,0.018969599405924478
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,2560,2560,0.008407466610272725
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,2560,2560,0.017113600174585977
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,2560,3584,0.014961066842079162
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,2560,2048,0.00749120016892751
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,2560,2048,0.016169599692026772
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,2560,3072,0.013883733749389648
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,2560,1536,0.0061930666367212926
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,2560,2560,0.011852799852689107
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,2560,1536,0.01620266636212667
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,2560,2048,0.0100490669409434
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,2560,1536,0.008389332890510559
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,2560,1024,0.0045525332291920986
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,2560,1024,0.01574720044930776
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,2560,1024,0.007246933380762736
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,2560,768,0.004204800228277842
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,2560,768,0.015544533729553223
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,2560,768,0.0068245331446329755
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,2560,512,0.0036490666369597114
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,2560,512,0.01523413360118866
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,2560,256,0.00329066663980484
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,2560,256,0.014842666188875833
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,2560,128,0.003010133405526479
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,2560,128,0.014426666498184203
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,2560,512,0.005850666761398315
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,2560,64,0.0028938665986061097
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,2560,64,0.014873600006103516
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,2560,256,0.005288533369700114
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,2560,32,0.0029887999097506206
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,2560,32,0.014566399653752646
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,2560,128,0.005118933320045471
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,2048,65536,0.10594453016916912
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,2048,65536,0.07988159656524658
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,2048,16384,0.03017173409461975
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,2048,16384,0.03185919920603435
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,2048,12288,0.024245333671569825
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,2048,10240,0.02515733242034912
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,2048,12288,0.027459200223286944
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,2048,10240,0.020963199933369956
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,2048,8192,0.017388800779978432
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,2048,65536,0.18395519256591797
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,2048,16384,0.049377067883809404
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,2048,8192,0.02444266676902771
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,2048,7168,0.015892266233762106
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,2048,12288,0.03803093433380127
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,2048,7168,0.02278613249460856
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,2048,5120,0.013623467087745667
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,2048,5120,0.020478934049606323
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,2048,10240,0.03264426589012146
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,2048,4096,0.012121599912643433
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,2048,6144,0.014244266351064048
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,2048,8192,0.02563626567522685
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,2048,6144,0.021800533930460612
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,2048,7168,0.023405865828196207
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,2048,4096,0.019595734278361
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,2048,4096,0.015289599696795145
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,2048,6144,0.02063680092493693
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,2048,5120,0.017458132902781167
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,2048,3584,0.010632533828417461
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,2048,2560,0.016673066218694053
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,2048,3584,0.018284799655278523
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,2048,3072,0.009723732868830364
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,2048,3072,0.01736853321393331
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,2048,3584,0.014355199535687766
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,2048,2560,0.007799466451009114
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,2048,2560,0.01114453375339508
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,2048,3072,0.012876799702644348
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,2048,2048,0.006975999971230824
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,2048,2048,0.016821332772572837
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,2048,1536,0.005452799797058106
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,2048,1536,0.01619733373324076
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,2048,2048,0.009672533472379048
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,2048,1024,0.0043488000830014546
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,2048,1024,0.015752533078193666
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,2048,1536,0.00830080012480418
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,2048,768,0.003960533440113068
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,2048,768,0.015530666708946228
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,2048,512,0.0036277333895365395
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,2048,1024,0.007176533341407776
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,2048,512,0.015387733777364096
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,2048,768,0.006908800204594929
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,2048,256,0.0032298666735490165
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,2048,256,0.014883200327555338
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,2048,64,0.014781866470972696
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,2048,256,0.0052704001466433205
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,2048,512,0.005897599955399831
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,1536,65536,0.08262613614400229
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,2048,128,0.003052799900372823
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,2048,128,0.014516266187032065
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,2048,64,0.0029418667157491045
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,2048,128,0.005048533280690512
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,2048,32,0.0029088000456492106
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,2048,32,0.014579199751218162
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,1536,65536,0.07118079662322999
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,1536,16384,0.02444373369216919
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,1536,65536,0.16018026669820148
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,1536,16384,0.027478400866190595
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,1536,12288,0.019273600975672402
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,1536,12288,0.02525866627693176
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,1536,8192,0.021465599536895752
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,1536,16384,0.044225064913431804
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,1536,12288,0.034119466940561935
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,1536,10240,0.016781866550445557
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,1536,10240,0.02355626622835795
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,1536,8192,0.013873066504796347
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,1536,10240,0.030193066596984862
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,1536,7168,0.0127210666735967
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,1536,7168,0.022048000494639078
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,1536,7168,0.02308373252550761
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,1536,8192,0.024126933018366496
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,1536,6144,0.011524266997973124
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,1536,6144,0.022443733612696328
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,1536,5120,0.010531199971834819
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,1536,5120,0.02026559909184774
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,1536,3584,0.0176256000995636
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,1536,6144,0.019435733556747437
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,1536,4096,0.009476266304651896
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,1536,4096,0.01887680093447367
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,1536,5120,0.01646719972292582
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,1536,4096,0.013778133193651834
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,1536,3584,0.009014399846394856
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,1536,3072,0.009825066725413004
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,1536,3072,0.0172437330087026
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,1536,3584,0.012585600217183432
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,1536,2560,0.007379200061162312
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,1536,3072,0.011569066842397054
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,1536,2560,0.01676586667696635
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,1536,2048,0.006365866462389629
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,1536,2048,0.016614400347073875
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,1536,2560,0.010660266876220703
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,1536,1536,0.007720533510049183
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,1536,2048,0.008962133526802063
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,1536,1536,0.005019733309745788
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,1536,1536,0.016310399770736693
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,1536,1024,0.004214400053024292
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,1536,1024,0.015639467040697734
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,1536,768,0.0039594667653242745
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,1536,768,0.015480533242225647
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,1536,1024,0.006487466891606649
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,1536,512,0.0036629334092140196
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,1536,512,0.015281066298484802
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,1536,768,0.006320000191529592
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,1536,256,0.0033002667129039764
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,1536,512,0.005610666672388713
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,1536,256,0.014733866850535075
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,1536,128,0.003014400104681651
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,1536,128,0.014456533392270408
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,1024,65536,0.054871467749277744
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,1536,256,0.005058133105436961
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,1536,128,0.00491839994986852
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,1536,64,0.002811733384927114
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,1536,64,0.014663466811180114
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,1536,32,0.0029898665845394133
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,1536,32,0.014442666371663412
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,1024,65536,0.05736213525136312
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,1024,65536,0.13004906972249347
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,1024,16384,0.01830613414446513
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,1024,16384,0.024205867449442545
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,1024,12288,0.013914666573206582
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,1024,12288,0.02578666607538859
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,1024,16384,0.03736319939295451
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,1024,10240,0.012470400333404541
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,1024,12288,0.02741439938545227
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,1024,10240,0.024013866980870567
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,1024,7168,0.017875200510025023
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,1024,10240,0.02558293342590332
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,1024,8192,0.010705066720644633
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,1024,8192,0.02217493255933126
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,1024,8192,0.02116480072339376
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,1024,7168,0.010171733299891154
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,1024,7168,0.021166932582855225
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,1024,6144,0.012086400389671325
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,1024,6144,0.020538665850957236
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,1024,6144,0.016057599584261575
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,1024,5120,0.010573866963386535
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,1024,5120,0.01985599994659424
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,1024,5120,0.01381226678689321
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,1024,4096,0.009169066945711773
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,1024,4096,0.019128533204396565
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,1024,4096,0.011569066842397054
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,1024,3072,0.010078932841618855
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,1024,3584,0.0085098663965861
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,1024,3584,0.018281600872675576
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,1024,3072,0.007793066898981731
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,1024,3584,0.010808533430099488
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,1024,3072,0.01733013391494751
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,1024,2560,0.00697813332080841
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,1024,2560,0.01688213348388672
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,1024,2560,0.00918293297290802
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,1024,2048,0.0056533331672350565
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,1024,2048,0.01646293302377065
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,1024,2048,0.008002133170763651
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,1024,1536,0.0049781332413355505
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,1024,1536,0.016307199994723
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,1024,768,0.005824000140031179
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,1024,1024,0.004324266811211904
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,1024,1536,0.0069930667678515124
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,1024,1024,0.015558399756749473
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,1024,256,0.003186133255561193
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,1024,768,0.0039818666875362395
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,1024,1024,0.005924266576766968
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,1024,768,0.015357866883277893
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,1024,512,0.0035594666997591654
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,1024,512,0.015003732840220132
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,1024,512,0.005378133555253347
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,1024,256,0.014696533481280008
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,1024,128,0.002977066735426585
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,1024,128,0.014406399925549826
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,1024,256,0.005015466610590617
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,1024,128,0.004814933240413666
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,1024,64,0.00277866671482722
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,768,16384,0.01411946713924408
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,768,65536,0.10489919980367024
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,1024,64,0.014553599556287131
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,1024,32,0.00290133332212766
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,1024,32,0.014565333724021912
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,768,65536,0.044158931573232016
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,768,65536,0.05160640080769857
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,768,16384,0.02441493272781372
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,768,16384,0.031115732590357464
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,768,12288,0.011815466483434041
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,768,12288,0.022111999988555908
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,768,12288,0.024487467606862386
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,768,10240,0.010689066847165425
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,768,10240,0.02143359978993734
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,768,10240,0.02025066614151001
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,768,8192,0.01137600044409434
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,768,8192,0.020155733823776244
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,768,8192,0.016025599837303162
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,768,7168,0.010551466544469198
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,768,7168,0.019590399662653604
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,768,6144,0.009879466891288758
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,768,7168,0.014750933647155762
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,768,6144,0.018685867389043175
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,768,6144,0.013453867038091025
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,768,5120,0.008904533584912618
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,768,5120,0.019211733341217042
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,768,5120,0.011736533045768738
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,768,4096,0.007550933460394542
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,768,3072,0.017259732882181803
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,768,4096,0.01839253306388855
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,768,4096,0.009869866569836934
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,768,3584,0.007008000214894612
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,768,3584,0.01763946612675985
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,768,3584,0.009433600306510925
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,768,2048,0.016235733032226564
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,768,3072,0.00691840002934138
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,768,3072,0.008775466680526733
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,768,2560,0.00625493327776591
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,768,2560,0.01678933302561442
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,768,2048,0.005479466418425242
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,768,2560,0.008035199840863545
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,768,1024,0.005786666770776113
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,768,2048,0.007180800040562947
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,768,768,0.015212800105412802
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,768,1536,0.0047978664437929785
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,768,1536,0.015843199690183003
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,768,1536,0.006669866542021434
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,768,1024,0.004187733431657155
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,768,256,0.0031189332405726117
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,768,1024,0.015702399611473083
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,768,256,0.004969599843025208
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,768,768,0.0038272000849246977
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,768,768,0.0056309332450230915
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,768,512,0.003502933432658514
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,768,512,0.015236266454060874
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,768,512,0.0052373334765434265
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,768,256,0.014658133188883463
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,768,128,0.002915200094381968
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,768,128,0.014472533265749613
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,768,128,0.004749866823355356
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,768,64,0.0028512001037597655
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,768,64,0.014454399545987448
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,768,32,0.002809600035349528
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,768,32,0.01446613371372223
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,512,65536,0.03259413242340088
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,512,65536,0.04535040060679118
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,512,65536,0.09725120067596435
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,512,16384,0.012859732906023661
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,512,16384,0.022249599297841392
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,512,16384,0.025356799364089966
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,512,12288,0.009962667028109233
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,512,12288,0.021871999899546305
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,512,12288,0.020644267400105797
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,512,10240,0.01032533347606659
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,512,10240,0.02069013317426046
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,512,8192,0.009060266613960265
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,512,8192,0.019153066476186118
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,512,6144,0.01874666611353556
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,512,6144,0.009252267082532246
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,512,10240,0.01754986643791199
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,512,7168,0.008914132912953694
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,512,7168,0.019293866554896035
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,512,7168,0.013255467017491659
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,512,8192,0.014647466937700906
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,512,6144,0.011748266220092774
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,512,5120,0.008061866462230682
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,512,5120,0.018837332725524902
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,512,5120,0.010568533341089885
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,512,4096,0.007153066496054332
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,512,4096,0.01795413295427958
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,512,4096,0.00938986639181773
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,512,2560,0.0061951999862988796
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,512,3584,0.006916266679763794
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,512,3584,0.01765120029449463
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,512,3584,0.009129599730173747
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,512,3072,0.006445866823196411
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,512,3072,0.017435733477274576
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,512,3072,0.008215466638406117
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,512,2560,0.016786134243011473
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,512,2048,0.005481599768002828
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,512,1024,0.004159999887148539
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,512,2048,0.016272000471750894
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,512,2048,0.006932266553243001
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,512,1536,0.004725333551565806
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,512,2560,0.007979733248551685
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,512,1536,0.01609599987665812
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,512,1024,0.015713066856066386
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,512,1024,0.005676800012588501
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,512,768,0.0040554667512575785
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,512,768,0.015677866339683533
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,512,1536,0.006542933483918508
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,512,256,0.004829866687456766
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,512,512,0.0034645333886146545
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,512,512,0.014918399850527444
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,512,768,0.005416533350944519
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,512,512,0.005223466455936432
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,512,256,0.0030613332986831666
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,512,256,0.014692266782124838
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,512,128,0.0029472000896930695
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,512,128,0.014465066790580749
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,512,128,0.004743466774622599
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,512,64,0.002792533238728841
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,512,64,0.014577066898345948
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,512,32,0.002775466690460841
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,512,32,0.01458453337351481
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,256,65536,0.020286933581034342
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,256,65536,0.03660159905751546
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,256,65536,0.0859882672627767
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,256,16384,0.009289600451787313
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,256,10240,0.01930239995320638
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,256,16384,0.02019946575164795
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,256,10240,0.01671573321024577
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,256,8192,0.00691840002934138
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,256,8192,0.018857600291570027
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,256,12288,0.00883733332157135
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,256,12288,0.01957119901974996
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,256,10240,0.008024533092975617
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,256,7168,0.006583466629187266
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,256,7168,0.019012266397476198
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,256,16384,0.02371413310368856
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,256,12288,0.019010132551193236
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,256,6144,0.00621973325808843
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,256,6144,0.018279467026392618
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,256,6144,0.011689600348472596
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,256,5120,0.00647680014371872
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,256,5120,0.018532266219456993
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,256,4096,0.006178133189678192
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,256,3584,0.017831466595331826
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,256,8192,0.014389333128929139
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,256,7168,0.013225600123405457
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,256,4096,0.017834667364756265
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,256,4096,0.009547733267148336
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,256,3584,0.006586666901906331
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,256,5120,0.01050986647605896
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,256,3584,0.008987733721733093
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,256,3072,0.006188799937566122
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,256,3072,0.017092265685399375
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,256,2560,0.006017066538333893
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,256,2560,0.016617600123087564
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,256,2560,0.007830399771531422
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,256,2048,0.005329066514968872
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,256,3072,0.008182399968306223
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,256,2048,0.016203733285268147
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,256,1536,0.0067104001839955645
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,256,2048,0.006964266796906789
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,256,1536,0.0046965335806210835
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,256,1536,0.015918933351834617
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,256,1024,0.004031999905904134
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,256,1024,0.015479466319084168
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,256,768,0.003667200108369192
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,256,768,0.015254400173823037
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,256,768,0.005438933273156484
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,256,512,0.0033290666838486993
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,256,1024,0.005636266867319743
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,256,512,0.014941866199175516
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,256,512,0.005145599941412607
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,256,256,0.002995199958483378
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,256,256,0.01495146652062734
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,256,256,0.004863999783992767
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,256,128,0.0028319999575614927
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,256,128,0.014318933089574179
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,256,128,0.0046965335806210835
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,256,64,0.0026389333109060925
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,256,64,0.014616533120473226
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,256,32,0.002701866626739502
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,256,32,0.014492799838383993
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,128,65536,0.03171839912732442
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,128,65536,0.012487467130025227
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,128,65536,0.08601600329081217
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,128,16384,0.007161599894364675
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,128,16384,0.02083946665128072
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,128,12288,0.006921599805355072
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,128,12288,0.019398399194081626
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,128,12288,0.019025067488352455
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,128,10240,0.006512000163396199
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,128,16384,0.024179200331370033
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,128,10240,0.018978132804234823
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,128,10240,0.016807466745376587
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,128,8192,0.006353066861629486
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,128,8192,0.018433066209157307
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,128,8192,0.014324266711870828
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,128,7168,0.006215466558933258
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,128,7168,0.019205333789189656
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,128,7168,0.013133866588274637
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,128,6144,0.005996799965699514
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,128,6144,0.018513067563374837
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,128,6144,0.011939199765523274
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,128,5120,0.006437333424886067
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,128,5120,0.018679465850194296
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,128,5120,0.010520533720652262
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,128,4096,0.006072533130645752
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,128,4096,0.01798080007235209
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,128,4096,0.009282132983207703
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,128,3584,0.0065290664633115125
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,128,3584,0.017452800273895265
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,128,3072,0.006242133180300395
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,128,3072,0.01713706652323405
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,128,3072,0.008201600114504496
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,128,2560,0.006020266811052958
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,128,3584,0.0090421328941981
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,128,1536,0.01588586668173472
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,128,2560,0.01660160024960836
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,128,2560,0.007795199751853943
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,128,2048,0.005365333457787832
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,128,2048,0.016038399934768677
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,128,2048,0.00687360018491745
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,128,1536,0.00472320020198822
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,128,1024,0.0040554667512575785
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,128,1024,0.015442132949829102
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,128,512,0.003365333378314972
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,128,1536,0.0066453332702318835
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,128,768,0.003622400015592575
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,128,768,0.015668267011642457
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,128,512,0.014919466773668923
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,128,256,0.0030080000559488933
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,128,512,0.00506986677646637
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,128,1024,0.005787733197212219
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,128,256,0.01454080045223236
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,128,256,0.0048096001148223875
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,128,128,0.0028768000503381092
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,128,768,0.005363200108210246
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,128,128,0.014430933197339377
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,80,128,128,0.004750933249791463
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,64,65536,0.029471999406814574
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,64,16384,0.00624533345301946
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,128,64,0.002752000093460083
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,128,64,0.014413866400718688
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,128,32,0.002677333354949951
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,128,32,0.014324266711870828
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,64,65536,0.009880533814430237
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,64,12288,0.006179200112819671
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,64,16384,0.020062933365503945
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,64,12288,0.019407999515533448
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,64,10240,0.006347733239332835
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,64,10240,0.01985493302345276
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,64,8192,0.006209066510200501
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,64,8192,0.018629332383473717
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,64,7168,0.006128000219662985
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,64,7168,0.01931519905726115
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,64,6144,0.005998933315277099
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,64,6144,0.01869759956995646
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,64,5120,0.006324266890684764
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,64,5120,0.01902186671892802
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,64,4096,0.005862399935722351
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,64,4096,0.017670400937398276
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,64,3584,0.006405333181222279
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,64,3584,0.01722773313522339
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,64,3072,0.006107733150323232
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,64,3072,0.01683093309402466
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,64,2560,0.006038400034109751
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,64,2560,0.01653439998626709
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,64,2048,0.005348266661167144
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,64,2048,0.01597653329372406
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,64,1536,0.0046741331617037455
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,64,1536,0.015889066457748412
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,64,1024,0.004081066697835922
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,64,1024,0.015082666277885437
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,64,128,0.0029088000456492106
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,64,768,0.0036938667297363283
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,64,768,0.015099733074506124
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,64,512,0.0033610666791598
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,64,512,0.014773333072662353
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,64,256,0.0030250666042168934
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,64,256,0.014615466197331747
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,64,128,0.014437333742777506
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,64,64,0.0026911998788515727
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,64,64,0.014591999848683677
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,64,32,0.002733866622050603
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,64,32,0.014328533411026
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,32,65536,0.011339733004570007
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,32,65536,0.02911253372828166
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,32,10240,0.01869973341623942
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,32,16384,0.00636053333679835
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,32,16384,0.020414932568868
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,32,12288,0.0061247999469439185
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,32,12288,0.01880853374799093
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,32,10240,0.006330666442712148
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,32,8192,0.006192000210285186
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,32,8192,0.018573866287867228
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,32,7168,0.006088533500830332
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,32,7168,0.018658133347829182
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,32,6144,0.005891199906667074
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,32,6144,0.018599466482798258
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,32,5120,0.006309333443641663
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,32,5120,0.018900267283121743
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,32,4096,0.005829333265622457
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,32,4096,0.017629865805308023
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,32,3584,0.006247466802597046
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,32,3584,0.01769919991493225
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,32,3072,0.005947733422120413
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,32,3072,0.016993065675099693
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,32,2560,0.0059914668401082356
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,32,2560,0.016826667388280234
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,32,2048,0.005369600156943003
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,32,2048,0.016225066781044007
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,32,1536,0.004691199958324432
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,32,1536,0.015867732961972556
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,32,1024,0.0040287998815377556
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,32,1024,0.015527466932932535
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,32,768,0.0036415999134381616
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,32,768,0.01504746675491333
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,32,512,0.003369600077470144
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,32,512,0.014759467045466105
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,32,256,0.0030016000072161358
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,32,256,0.014421332875887552
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,32,128,0.002775466690460841
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,32,128,0.01442346672217051
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,32,64,0.0026933332284291584
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,32,64,0.014458666245142618
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,80,32,32,0.002656000107526779
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,80,32,32,0.01420906682809194
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,65536,16384,0.40726931889851886
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,65536,16384,0.7700000127156575
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,65536,12288,0.5920149485270183
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,65536,12288,0.3111733436584473
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,65536,10240,0.4927050590515137
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,65536,10240,0.2636906623840332
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,65536,8192,0.3938783963521322
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,65536,8192,0.21522347132364908
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,65536,7168,0.19125332832336425
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,65536,7168,0.34483200709025064
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,65536,6144,0.2970730781555176
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,65536,6144,0.16685546239217122
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,65536,5120,0.24886080423990883
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,65536,5120,0.1411413351694743
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,65536,4096,0.19952319463094076
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,65536,4096,0.11642666657765706
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,65536,16384,0.3682197252909342
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,65536,3584,0.17750825881958007
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,65536,12288,0.2784543991088867
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,65536,3584,0.10557226339975993
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,65536,3072,0.15251946449279785
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,65536,3072,0.0935103972752889
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,65536,10240,0.23486293156941734
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,65536,8192,0.18957014083862306
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,65536,7168,0.16698026657104492
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,65536,2560,0.15029865900675457
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,65536,2560,0.08658560117085776
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,65536,2048,0.10393706957499187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,65536,2048,0.06760533650716147
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,65536,6144,0.14704747200012208
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,65536,1536,0.08062613010406494
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,65536,1536,0.06330026785532633
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,65536,1024,0.05597440004348755
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,65536,1024,0.04429653485616048
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,65536,5120,0.12298773129781086
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,65536,768,0.05023573239644369
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,65536,768,0.03816959857940674
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,65536,4096,0.1007530689239502
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,65536,512,0.035046398639678955
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,65536,512,0.030253867308298748
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,65536,3072,0.07803946336110433
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,65536,256,0.020175999402999877
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,65536,512,0.020950400829315187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,65536,256,0.022428800662358604
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,65536,128,0.010500267148017883
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,65536,128,0.019985065857569376
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,65536,2048,0.05488640069961548
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,65536,768,0.026346667607625322
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,65536,64,0.008020266890525818
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,65536,32,0.006437333424886067
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,65536,64,0.018168532848358156
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,65536,32,0.018465065956115724
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,65536,1024,0.03247146606445313
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,65536,3584,0.08907306989034017
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,16384,65536,0.4218570709228516
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,16384,65536,0.7554367701212565
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,16384,16384,0.19957332611083983
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,16384,16384,0.12730666796366374
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,16384,12288,0.14648106892903645
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,16384,12288,0.0922048012415568
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,16384,10240,0.1222218672434489
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,16384,10240,0.07969066301981607
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,65536,2560,0.07455466588338217
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,16384,8192,0.10260799725850422
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,16384,8192,0.0666645328203837
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,65536,128,0.01241919994354248
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,65536,256,0.014306132992108664
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,16384,7168,0.08635733127593995
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,16384,7168,0.06041706800460815
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,16384,6144,0.07646933396657309
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,16384,6144,0.05391786495844523
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,65536,1536,0.04332053263982137
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,16384,5120,0.0627573331197103
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,16384,5120,0.04827626546223958
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,16384,16384,0.11480639775594075
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,16384,65536,0.37138665517171227
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,16384,12288,0.07592746416727701
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,16384,4096,0.05083946784337362
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,16384,4096,0.0451904018719991
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,16384,3584,0.04494293530782063
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,16384,3584,0.03892800013224284
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,16384,10240,0.06475840012232462
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,16384,8192,0.05312106609344482
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,16384,3072,0.039410134156545
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,16384,3072,0.034385065237681076
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,16384,2560,0.03322346607844035
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,16384,2560,0.031011199951171874
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,16384,7168,0.04728533426920573
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,16384,2048,0.027383466561635334
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,16384,6144,0.042070400714874265
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,16384,2048,0.027537065744400024
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,16384,1536,0.02163413365681966
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,16384,1536,0.024027733008066814
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,16384,5120,0.03616960048675537
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,16384,1024,0.015378133455912272
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,16384,1024,0.021334399779637657
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,16384,768,0.011891200145085653
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,16384,768,0.019684267044067384
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,16384,4096,0.030154667297999066
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,16384,512,0.009038933118184407
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,16384,512,0.018203733364741008
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,16384,3072,0.024638932943344117
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,16384,256,0.006171733140945435
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,16384,256,0.015874133507410685
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,16384,768,0.010331733028093974
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,16384,128,0.00402453343073527
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,16384,128,0.01542080044746399
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,16384,2048,0.018147200345993042
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,16384,64,0.003772799919048945
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,16384,64,0.01535040040810903
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,16384,1024,0.011517866452534994
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,16384,32,0.0038794666528701783
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,16384,32,0.015799466768900552
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,16384,3584,0.02757013241449992
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,12288,65536,0.5791072209676107
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,16384,2560,0.02148800094922384
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,12288,65536,0.31370452245076497
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,12288,16384,0.08809066613515218
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,12288,16384,0.16578666369120282
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,12288,12288,0.10754559834798176
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,12288,12288,0.07091519832611085
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,12288,10240,0.09004800319671631
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,12288,10240,0.06132693290710449
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,16384,512,0.008725333213806152
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,12288,8192,0.07292586962381999
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,12288,8192,0.05240213473637899
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,16384,256,0.006090666850407918
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,16384,128,0.005910400052865346
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,12288,7168,0.06416320006052653
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,12288,7168,0.048259198665618896
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,12288,6144,0.056364798545837404
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,16384,1536,0.014820266763369241
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,12288,6144,0.04342720111211141
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,12288,5120,0.047251200675964354
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,12288,5120,0.03890560070673625
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,12288,12288,0.05892693201700846
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,12288,4096,0.03856533368428548
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,12288,4096,0.033973332246144614
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,12288,16384,0.0846442699432373
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,12288,3584,0.034519465764363606
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,12288,65536,0.28314453760782876
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,12288,3584,0.03187413414319356
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,12288,3072,0.030135466655095415
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,12288,3072,0.02947946588198344
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,12288,10240,0.049907199541727704
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,12288,2560,0.02910933295885722
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,12288,2560,0.026587732632954914
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,12288,8192,0.041929598649342856
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,12288,2048,0.02103360096613566
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,12288,2048,0.024312533934911094
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,12288,7168,0.03734506766001384
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,12288,1536,0.016938666502634682
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,12288,1536,0.022188800573349
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,12288,6144,0.03293333252271016
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,12288,1024,0.011580800016721089
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,12288,1024,0.01943146586418152
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,12288,5120,0.028518400589625043
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,12288,768,0.009598933657010396
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,12288,768,0.01833066741625468
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,12288,3072,0.01946453253428141
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,12288,512,0.007366399963696797
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,12288,4096,0.02405760089556376
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,12288,512,0.015659733613332113
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,12288,256,0.004026666780312856
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,12288,256,0.01567893326282501
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,12288,2560,0.017018665870030723
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,12288,128,0.00363520011305809
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,12288,128,0.015177599589029946
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,12288,3584,0.02434773246447245
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,12288,64,0.00347626656293869
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,12288,64,0.015213867028554281
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,12288,32,0.003622400015592575
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,12288,32,0.015375999609629312
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,10240,65536,0.4866645177205403
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,12288,1536,0.012006400028864543
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,10240,65536,0.27971839904785156
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,10240,16384,0.12181759675343831
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,10240,16384,0.0765343983968099
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,12288,1024,0.009886933366457622
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,12288,2048,0.01418880025545756
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,10240,12288,0.08932480017344156
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,10240,12288,0.061544533570607504
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,12288,768,0.008835200468699138
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,12288,256,0.005464533468087515
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,12288,128,0.005167999863624572
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,12288,512,0.006262399752934774
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,10240,10240,0.07435626983642578
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,10240,10240,0.053956266244252524
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,10240,8192,0.06049173275629679
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,10240,8192,0.04662079811096191
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,10240,7168,0.053291734059651694
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,10240,7168,0.04270506699879964
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,10240,6144,0.046972799301147464
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,10240,6144,0.0387231985727946
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,10240,5120,0.03949013153711955
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,10240,5120,0.0350655992825826
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,10240,65536,0.24018774032592774
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,10240,4096,0.037164799372355145
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,10240,4096,0.03138239979743958
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,10240,16384,0.06563839912414551
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,10240,12288,0.05092266798019409
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,10240,3584,0.030590933561325074
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,10240,3584,0.028779733180999755
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,10240,3072,0.02535360058148702
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,10240,3072,0.026715733607610065
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,10240,10240,0.043067733446757
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,10240,2560,0.021917865673700968
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,10240,6144,0.028446932633717854
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,10240,2560,0.024530132611592613
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,10240,2048,0.018143999576568603
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,10240,7168,0.03209493358929952
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,10240,2048,0.022788266340891518
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,10240,8192,0.036124801635742186
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,10240,1536,0.01423679987589518
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,10240,1536,0.02078933318456014
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,10240,1024,0.010167466600735982
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,10240,5120,0.0247381329536438
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,10240,1024,0.018771199385325114
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,10240,768,0.008449066678682964
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,10240,768,0.01748266617457072
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,10240,3072,0.01670080025990804
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,10240,512,0.006634666522343953
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,10240,512,0.015108266472816467
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,10240,4096,0.020882133642832437
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,10240,256,0.003929600119590759
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,10240,256,0.015398400028546652
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,10240,3584,0.01885333259900411
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,10240,128,0.0035775999228159585
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,10240,128,0.014811733365058899
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,10240,2560,0.014852266510327658
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,10240,64,0.0034229333202044168
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,10240,64,0.014972800016403198
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,10240,32,0.003565866748491923
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,10240,32,0.015032533804575601
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,10240,1536,0.010875733693440755
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,8192,65536,0.38560638427734373
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,10240,2048,0.012506666779518127
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,8192,65536,0.21821440060933434
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,10240,768,0.007463466624418895
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,8192,16384,0.09408960342407227
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,8192,16384,0.06458986600240071
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,8192,12288,0.07192959785461425
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,8192,12288,0.05306666692097982
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,8192,10240,0.06024959882100424
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,8192,10240,0.046838398774464926
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,10240,1024,0.009040000041325887
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,10240,512,0.005588266750176748
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,10240,256,0.005201066533724466
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,8192,8192,0.049353599548339844
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,8192,8192,0.04026666482289632
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,10240,128,0.00498879998922348
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,8192,7168,0.04351253509521484
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,8192,7168,0.037164799372355145
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,8192,6144,0.03854293425877889
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,8192,6144,0.03401279846827189
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,8192,5120,0.032433066765467325
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,8192,5120,0.03097813328107198
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,8192,65536,0.19490666389465333
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,8192,4096,0.03049813310305278
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,8192,16384,0.05366933345794678
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,8192,12288,0.04212693373362224
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,8192,4096,0.027511467536290485
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,8192,3584,0.023827199141184488
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,8192,10240,0.03606293201446533
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,8192,3584,0.025849600632985432
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,8192,8192,0.030525867144266767
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,8192,3072,0.0244704008102417
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,8192,3072,0.020985599358876547
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,8192,7168,0.026984532674153645
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,8192,2560,0.018040533860524496
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,8192,2560,0.02251733342806498
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,8192,2048,0.01493013302485148
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,8192,2048,0.020972800254821778
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,8192,6144,0.02390506664911906
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,8192,1536,0.011688533425331115
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,8192,1536,0.020115200678507486
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,8192,1024,0.008737066388130188
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,8192,1024,0.018039466937383015
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,8192,5120,0.02085439960161845
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,8192,768,0.007529599964618683
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,8192,768,0.015638400117556253
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,8192,4096,0.0175872008005778
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,8192,512,0.006002133091290792
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,8192,512,0.01566933294137319
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,8192,3072,0.014321066935857139
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,8192,256,0.0038421332836151125
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,8192,3584,0.01641279955705007
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,8192,256,0.015336533387502035
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,8192,2048,0.01138879954814911
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,8192,128,0.00344106654326121
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,8192,1024,0.00817920019229253
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,8192,128,0.015043200055758158
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,8192,64,0.0033802665770053864
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,8192,64,0.015132799744606018
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,8192,32,0.0034815999368826545
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,8192,32,0.01523413360118866
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,8192,2560,0.013034666577974955
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,7168,65536,0.35151147842407227
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,7168,65536,0.20515626271565757
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,7168,16384,0.1035925308863322
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,7168,16384,0.05982186794281006
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,7168,12288,0.06973333358764648
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,8192,1536,0.009924266735712688
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,7168,12288,0.04883519808451335
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,7168,10240,0.053597867488861084
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,7168,10240,0.04345386823018392
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,8192,768,0.006285866598288219
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,8192,512,0.005621333420276642
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,7168,8192,0.043449600537618
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,7168,8192,0.03815466562906901
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,8192,256,0.005374933282534281
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,8192,128,0.005217066903909048
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,7168,7168,0.03812266588211059
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,7168,7168,0.03505813280741374
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,7168,6144,0.03393813371658325
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,7168,6144,0.032570666074752806
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,7168,5120,0.028675200541814168
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,7168,5120,0.0294922669728597
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,7168,12288,0.03877546787261963
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,7168,65536,0.17744639714558919
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,7168,4096,0.025214932362238568
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,7168,16384,0.04941333134969075
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,7168,4096,0.02700693408648173
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,7168,3584,0.021331199010213218
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,7168,3584,0.024709333976109825
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,7168,3072,0.01935466726620992
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,7168,3072,0.02307413419087728
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,7168,10240,0.03342613379160563
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,7168,2560,0.016148266196250916
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,7168,2560,0.021874133745829263
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,7168,6144,0.02214933236440023
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,7168,2048,0.013846400380134582
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,7168,8192,0.027831466992696126
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,7168,2048,0.02039360006650289
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,7168,7168,0.024897066752115886
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,7168,1536,0.01067733367284139
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,7168,1536,0.01930346687634786
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,7168,1024,0.008251733581225077
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,7168,1024,0.017517866690953572
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,7168,5120,0.01914560000101725
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,7168,768,0.007206400235493978
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,7168,768,0.01583146651585897
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,7168,4096,0.01608746647834778
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,7168,3072,0.013140267133712769
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,7168,512,0.0047882666190465295
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,7168,3584,0.014761599898338317
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,7168,2560,0.012119467059771221
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,7168,512,0.015457066893577575
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,7168,256,0.0037600000699361167
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,7168,256,0.01513813336690267
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,7168,2048,0.010573866963386535
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,7168,1536,0.009044266740481059
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,7168,128,0.0034175999462604523
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,7168,128,0.015043200055758158
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,7168,768,0.005866666634877523
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,7168,1024,0.0069482664267222095
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,7168,64,0.0031445334355036415
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,7168,64,0.015260799725850423
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,7168,32,0.0032597333192825317
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,7168,32,0.014939733346303306
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,6144,65536,0.2948650677998861
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,6144,65536,0.18188907305399576
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,6144,16384,0.07788586616516113
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,6144,16384,0.05478399991989136
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,6144,12288,0.06023573478062948
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,6144,12288,0.04457813501358032
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,7168,512,0.0053951998551686605
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,6144,10240,0.05128000179926554
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,6144,10240,0.039545599619547525
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,6144,8192,0.03969599803288777
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,6144,8192,0.034764798482259114
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,6144,7168,0.034686934947967527
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,7168,256,0.005067733426888784
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,6144,7168,0.032577067613601685
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,6144,6144,0.030769066015879316
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,7168,128,0.004901333153247834
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,6144,6144,0.02996586759885152
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,6144,5120,0.02542826731999715
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,6144,5120,0.027090134223302205
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,6144,65536,0.15729600588480633
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,6144,12288,0.03471999963124593
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,6144,4096,0.02114560008049011
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,6144,16384,0.049693866570790605
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,6144,4096,0.024626133839289348
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,6144,3584,0.018810667594273887
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,6144,10240,0.029738666613896687
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,6144,3584,0.023568000396092734
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,6144,6144,0.019925334056218467
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,6144,3072,0.01665173371632894
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,6144,3072,0.02217386762301127
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,6144,8192,0.024855466683705647
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,6144,7168,0.022296533981959025
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,6144,2560,0.01418880025545756
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,6144,2560,0.021057067314783733
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,6144,2048,0.011954133709271748
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,6144,2048,0.019764266411463418
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,6144,1536,0.009714133540789286
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,6144,1536,0.01858453353246053
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,6144,5120,0.017171200116475424
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,6144,1024,0.007712000111738841
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,6144,1024,0.015929599603017174
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,6144,768,0.006589866677920024
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,6144,768,0.015820800264676412
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,6144,4096,0.014677332838376364
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,6144,3072,0.011979732910792034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,6144,512,0.004353066782156626
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,6144,512,0.015561599532763162
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,6144,256,0.003568000098069509
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,6144,256,0.015227733055750528
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,6144,1536,0.008529067039489746
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,6144,128,0.0032159999012947083
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,6144,128,0.014781866470972696
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,6144,3584,0.013464533289273582
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,6144,64,0.0031061333914597826
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,6144,2560,0.011121066411336263
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,6144,64,0.014798933267593383
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,6144,32,0.003202133377393087
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,6144,32,0.01477120021979014
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,6144,2048,0.009754666686058044
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,5120,65536,0.14591573079427084
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,5120,65536,0.25076373418172204
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,5120,16384,0.06458239952723185
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,6144,768,0.005523199836413065
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,5120,16384,0.055181864897410074
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,6144,1024,0.006153599917888641
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,5120,12288,0.05085759957631429
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,5120,12288,0.0400053342183431
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,5120,10240,0.04332480033238729
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,5120,10240,0.035595734914143876
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,5120,8192,0.036115201314290364
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,5120,8192,0.03142720063527425
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,6144,256,0.004925866425037384
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,5120,7168,0.02910826603571574
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,6144,128,0.004631466666857402
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,6144,512,0.005127466718355815
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,5120,7168,0.02982826630274455
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,5120,6144,0.025811199347178144
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,5120,6144,0.02752853234608968
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,5120,5120,0.02204586664835612
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,5120,5120,0.024940800666809083
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,5120,12288,0.030983465909957885
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,5120,65536,0.13941760063171388
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,5120,4096,0.01832746664683024
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,5120,4096,0.023192532857259116
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,5120,16384,0.039188265800476074
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,5120,3584,0.016335999965667723
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,5120,3584,0.021961599588394165
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,5120,3072,0.014270933469136557
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,5120,3072,0.02095680038134257
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,5120,10240,0.026743467648824053
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,5120,2560,0.012322133779525757
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,5120,2560,0.020247467358907065
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,5120,8192,0.02218559980392456
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,5120,2048,0.010315733154614766
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,5120,2048,0.01927466591199239
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,5120,6144,0.01771946748097738
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,5120,1536,0.00874773363272349
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,5120,1536,0.017577600479125977
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,5120,7168,0.020193066199620566
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,5120,1024,0.006950399776299794
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,5120,1024,0.015633066495259605
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,5120,5120,0.015473066767056783
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,5120,768,0.0052597333987553915
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,5120,768,0.015705600380897522
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,5120,3072,0.011188266674677531
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,5120,512,0.003922133396069208
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,5120,4096,0.013409066200256347
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,5120,512,0.015675733486811318
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,5120,256,0.0034730667869249977
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,5120,3584,0.012404266993204753
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,5120,256,0.014862933754920959
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,5120,2560,0.010436266660690308
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,5120,128,0.003270400067170461
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,5120,128,0.014888532956441245
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,5120,1536,0.007650133470694225
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,5120,64,0.003009066730737686
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,5120,2048,0.009196799993515015
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,5120,64,0.014914133151372275
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,5120,32,0.00306986669699351
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,5120,32,0.014855466286341348
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,4096,65536,0.1969578742980957
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,5120,1024,0.005779199798901876
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,4096,65536,0.12625919977823893
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,5120,768,0.005413333574930826
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,4096,16384,0.05641706784566244
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,4096,16384,0.04368106524149577
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,5120,512,0.005176533261934916
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,5120,256,0.004840533435344696
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,4096,12288,0.0413696010907491
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,5120,128,0.00472320020198822
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,4096,12288,0.03616960048675537
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,4096,10240,0.035485867659250894
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,4096,10240,0.03230080008506775
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,4096,8192,0.029655466477076214
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,4096,8192,0.028854399919509888
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,4096,7168,0.026819199323654175
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,4096,7168,0.026494934161504106
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,4096,6144,0.02165973385175069
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,4096,6144,0.024758400519688924
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,4096,5120,0.01855573256810506
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,4096,5120,0.023129600286483764
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,4096,65536,0.11379093329111736
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,4096,4096,0.017171200116475424
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,4096,4096,0.022293333212534586
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,4096,16384,0.03272533416748047
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,4096,3584,0.013425067067146301
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,4096,3584,0.020887466271718343
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,4096,12288,0.025972266991933186
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,4096,3072,0.012121599912643433
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,4096,3072,0.02023893396059672
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,4096,10240,0.022261333465576173
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,4096,8192,0.018866133689880372
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,4096,2560,0.010470400253931682
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,4096,2560,0.019077332814534505
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,4096,2048,0.009114666779836019
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,4096,2048,0.018205867211023966
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,4096,7168,0.01680319905281067
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,4096,6144,0.015102932850519816
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,4096,1536,0.007683200140794118
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,4096,1536,0.016824533541997276
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,4096,1024,0.006052266558011373
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,4096,5120,0.013358933726946512
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,4096,1024,0.015800533692042033
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,4096,768,0.004320000112056732
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,4096,768,0.01536853313446045
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,4096,4096,0.011758933464686077
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,4096,512,0.003773866593837738
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,4096,512,0.01528320014476776
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,4096,3072,0.01018986701965332
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,4096,3584,0.01112000048160553
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,4096,256,0.0033088001112143196
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,4096,256,0.014853333433469137
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,4096,128,0.0030965333183606463
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,4096,128,0.014663466811180114
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,4096,2048,0.008220799763997396
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,4096,1024,0.0055754666527112326
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,4096,768,0.005401599903901418
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,4096,2560,0.009544533491134644
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,4096,64,0.0029898665845394133
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,4096,1536,0.006628266473611195
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,4096,64,0.014620799819628397
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,4096,32,0.0029738667110602063
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,4096,32,0.014632532993952433
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,3584,65536,0.16935680707295736
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,4096,512,0.005273599922657013
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,3584,65536,0.11305812994639079
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,3584,16384,0.05104853312174479
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,3584,16384,0.0419648011525472
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,3584,12288,0.03541546662648519
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,4096,256,0.00496319979429245
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,3584,12288,0.038973867893218994
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,3584,10240,0.03302399913469951
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,3584,8192,0.0273632009824117
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,4096,128,0.004783999919891357
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,3584,10240,0.030246400833129884
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,3584,8192,0.026451200246810913
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,3584,7168,0.024915200471878052
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,3584,7168,0.025130667289098102
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,3584,6144,0.02211093306541443
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,3584,6144,0.024052266279856363
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,3584,5120,0.016546133160591125
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,3584,5120,0.02235413392384847
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,3584,65536,0.10475200017293293
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,3584,4096,0.014833066860834757
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,3584,4096,0.02114026745160421
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,3584,16384,0.0341045339902242
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,3584,3584,0.012225066622098286
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,3584,12288,0.024115200837453207
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,3584,3584,0.02033066749572754
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,3584,3072,0.011054933071136475
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,3584,3072,0.01978666583697001
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,3584,10240,0.020746666193008422
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,3584,2560,0.009913600484530131
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,3584,2560,0.018761599063873292
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,3584,8192,0.01759999990463257
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,3584,7168,0.01560533344745636
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,3584,2048,0.008730666836102803
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,3584,6144,0.014114133516947427
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,3584,2048,0.017105066776275636
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,3584,5120,0.012548266847928365
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,3584,1536,0.0073749333620071415
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,3584,1536,0.016006400187810264
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,3584,1024,0.005218133330345154
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,3584,1024,0.015657599767049155
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,3584,4096,0.011373866597811382
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,3584,768,0.00407679999868075
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,3584,768,0.01579839984575907
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,3584,3072,0.009784533580144247
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,3584,512,0.003714133302370707
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,3584,512,0.015251200397809347
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,3584,3584,0.010619733730951946
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,3584,256,0.0032490665713946023
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,3584,256,0.014793599645296732
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,3584,128,0.0030421334008375804
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,3584,128,0.014548266927401224
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,3584,2560,0.00920853316783905
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,3584,64,0.002890666574239731
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,3584,64,0.014737066626548768
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,3584,32,0.002917333443959554
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,3584,32,0.014802133043607077
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,3584,2048,0.007490133245786031
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,3072,65536,0.14695146878560383
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,3072,65536,0.10212799708048503
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,3072,16384,0.04197973410288493
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,3072,16384,0.03829866647720337
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,3584,1024,0.005579733351866404
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,3072,12288,0.03313493331273397
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,3584,768,0.005285333096981049
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,3072,12288,0.03127253254254659
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,3584,1536,0.00631466656923294
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,3072,10240,0.029049599170684816
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,3584,512,0.0051018665234247845
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,3072,10240,0.02802773316701253
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,3584,256,0.004857600231965383
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,3072,8192,0.02358400026957194
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,3072,8192,0.025235199928283693
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,3072,7168,0.021176532904307047
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,3072,7168,0.023745065927505492
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,3072,6144,0.017195733388264973
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,3584,128,0.004654933512210846
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,3072,6144,0.02302293380101522
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,3072,5120,0.014582399527231851
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,3072,5120,0.021679999430974324
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,3072,65536,0.10678826967875163
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,3072,4096,0.013397333025932313
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,3072,4096,0.020466132958730062
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,3072,16384,0.030306132634480794
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,3072,3584,0.011203199625015259
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,3072,3584,0.0197269340356191
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,3072,12288,0.023851732412974037
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,3072,3072,0.010099200407663982
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,3072,3072,0.01885546644528707
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,3072,10240,0.02053119937578837
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,3072,6144,0.013897599776585898
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,3072,2560,0.009096533060073853
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,3072,7168,0.015593600273132325
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,3072,2560,0.018168532848358156
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,3072,2048,0.008025600016117096
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,3072,8192,0.01730453372001648
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,3072,2048,0.016925867398579916
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,3072,1536,0.006844800213972728
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,3072,1536,0.016173866391181946
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,3072,1024,0.004680533210436503
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,3072,1024,0.0157258669535319
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,3072,5120,0.012590932846069335
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,3072,768,0.004093866546948751
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,3072,768,0.01530346671740214
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,3072,4096,0.011091199517250062
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,3072,3072,0.009504000345865887
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,3072,512,0.0037237333754698435
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,3072,3584,0.010491733749707539
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,3072,512,0.014958932995796204
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,3072,256,0.0032373333970705668
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,3072,256,0.014709333578745524
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,3072,128,0.0030037333567937215
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,3072,128,0.014564266800880432
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,3072,1536,0.006140799820423126
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,3072,64,0.0029663999875386557
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,3072,2560,0.00848640004793803
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,3072,64,0.014857600132624308
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,3072,32,0.0029887999097506206
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,3072,32,0.014705066879590353
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,3072,1024,0.005603200197219849
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,3072,2048,0.00699946681658427
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,2560,65536,0.12244373162587482
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,2560,65536,0.08853013515472412
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,3072,768,0.005312000215053558
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,2560,16384,0.03538879950841268
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,2560,16384,0.03435946702957153
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,2560,12288,0.028125866254170732
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,2560,12288,0.02894933422406514
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,2560,10240,0.024576000372568765
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,2560,10240,0.026225066184997557
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,2560,8192,0.020482132832209267
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,2560,8192,0.02405973275502523
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,3072,512,0.005028266708056132
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,3072,256,0.0047775998711586
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,2560,7168,0.017041067282358803
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,3072,128,0.004632533093293508
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,2560,7168,0.023373866081237794
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,2560,6144,0.015030399958292643
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,2560,6144,0.021846399704615275
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,2560,65536,0.10116693178812664
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,2560,12288,0.022524799903233847
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,2560,5120,0.013108266393343606
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,2560,16384,0.028510934114456175
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,2560,5120,0.021312000354131062
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,2560,4096,0.010925867160161336
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,2560,4096,0.020415999492009482
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,2560,3584,0.010182399551073711
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,2560,3584,0.019372800985972084
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,2560,3072,0.009351467092831928
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,2560,10240,0.01947093407313029
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,2560,3072,0.018381865819295247
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,2560,2560,0.008344533046086629
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,2560,2560,0.018199467658996583
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,2560,8192,0.016424533724784852
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,2560,2048,0.007309866448243459
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,2560,2048,0.01619733373324076
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,2560,6144,0.013398399949073792
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,2560,7168,0.014919466773668923
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,2560,1536,0.0061471998691558834
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,2560,1536,0.016385066509246825
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,2560,1024,0.004459733267625173
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,2560,1024,0.015801599621772765
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,2560,4096,0.010739200313886007
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,2560,5120,0.012150399883588155
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,2560,3072,0.008697600166002909
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,2560,768,0.004051200052102407
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,2560,768,0.015191466609636942
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,2560,512,0.0035946667194366455
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,2560,3584,0.010188800096511842
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,2560,2560,0.008251733581225077
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,2560,1536,0.00618453323841095
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,2560,512,0.015210666259129844
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,2560,2048,0.006966400146484375
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,2560,1024,0.005710933109124502
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,2560,256,0.00323840007185936
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,2560,256,0.014636799693107605
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,2560,128,0.002995199958483378
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,2560,128,0.014697600404421488
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,2560,64,0.002844800055027008
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,2560,64,0.014541866381963095
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,2560,32,0.0028778667251269023
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,2560,32,0.01469013293584188
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,2048,65536,0.10368959903717041
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,2048,65536,0.07641173203786214
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,2048,16384,0.02937600016593933
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,2048,16384,0.030715733766555786
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,2048,12288,0.023640533288319908
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,2048,12288,0.026628265778223675
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,2048,10240,0.02060799996058146
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,2048,10240,0.024477867285410564
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,2560,768,0.005348266661167144
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,2048,8192,0.01698026657104492
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,2048,8192,0.022668800751368203
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,2560,512,0.005089066425959269
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,2048,7168,0.015478400389353433
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,2560,256,0.0049098665515581764
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,2048,7168,0.021844265858332317
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,2560,128,0.004699733356634776
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,2048,6144,0.013297067085901896
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,2048,6144,0.021401600042978922
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,2048,5120,0.012088533242543538
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,2048,5120,0.020246400435765585
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,2048,65536,0.08321386973063151
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,2048,4096,0.010554666320482891
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,2048,4096,0.019323732455571493
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,2048,16384,0.02395413319269816
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,2048,3584,0.009213866790135701
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,2048,3584,0.018346667289733887
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,2048,12288,0.01889066696166992
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,2048,3072,0.00853546659151713
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,2048,3072,0.017383466164271034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,2048,10240,0.0166101336479187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,2048,2560,0.007769600053628286
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,2048,2560,0.016662399967511496
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,2048,8192,0.014361600081125895
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,2048,2048,0.006695466736952464
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,2048,2048,0.016265599926312765
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,2048,7168,0.013123200337092081
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,2048,1536,0.004970666766166687
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,2048,1536,0.01611733337243398
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,2048,6144,0.011992533008257549
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,2048,1024,0.004285866518815359
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,2048,1024,0.015620266397794088
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,2048,5120,0.01088213324546814
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,2048,768,0.003870933254559835
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,2048,768,0.015371732910474143
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,2048,4096,0.009603200356165568
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,2048,512,0.0035797332723935447
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,2048,512,0.015440000096956888
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,2048,3072,0.008057599763075511
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,2048,256,0.0031583999594052637
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,2048,3584,0.009032533566157023
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,2048,256,0.014831999937693277
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,2048,128,0.002899199972550074
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,2048,128,0.014797866344451904
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,2048,2560,0.007285333176453908
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,2048,64,0.0028277332584063213
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,2048,2048,0.006503466765085857
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,2048,64,0.014597333470980325
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,2048,32,0.0028213332096735638
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,2048,32,0.014642133315404256
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,1536,65536,0.07908266385396322
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,1536,65536,0.0664352019627889
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,2048,1024,0.0054848000407218935
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,1536,16384,0.023911466201146446
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,2048,768,0.00513919989267985
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,1536,16384,0.027062400182088213
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,1536,12288,0.01917653282483419
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,2048,1536,0.006095999975999197
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,1536,12288,0.023845332860946655
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,1536,10240,0.016318933169047038
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,1536,10240,0.02232746680577596
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,2048,512,0.004929066697756449
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,1536,8192,0.013607466220855713
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,2048,256,0.00483840008576711
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,1536,8192,0.02135253349939982
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,1536,7168,0.012641066312789917
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,1536,7168,0.021828265984853108
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,1536,6144,0.011442133784294128
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,2048,128,0.004620799918969473
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,1536,6144,0.02078826626141866
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,1536,5120,0.010615467031796774
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,1536,5120,0.019731199741363524
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,1536,65536,0.080404265721639
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,1536,4096,0.009455999732017517
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,1536,16384,0.023166932662328086
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,1536,4096,0.018382932742436728
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,1536,3584,0.008628267049789428
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,1536,12288,0.0186901330947876
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,1536,3584,0.01742080052693685
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,1536,3072,0.00790719985961914
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,1536,3072,0.016919465859731038
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,1536,10240,0.016637866695721946
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,1536,2560,0.007086933155854543
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,1536,2560,0.017108267545700072
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,1536,8192,0.01407360037167867
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,1536,7168,0.012870400150616964
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,1536,2048,0.005825066566467285
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,1536,2048,0.016391467054684958
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,1536,1536,0.0048213332891464235
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,1536,6144,0.011921067039171855
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,1536,1536,0.01604373355706533
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,1536,1024,0.0041184000670909885
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,1536,1024,0.015491200486818948
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,1536,5120,0.010674132903416952
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,1536,768,0.003841066608826319
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,1536,768,0.015230933825174967
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,1536,4096,0.00899733304977417
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,1536,3072,0.007572266459465027
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,1536,512,0.003499733408292135
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,1536,512,0.015352533260981242
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,1536,256,0.003201066702604294
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,1536,3584,0.008453333377838134
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,1536,256,0.014794666568438211
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,1536,128,0.0029258665939172106
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,1536,128,0.01456000010172526
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,1536,2560,0.007201066613197327
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,1536,64,0.0027445333699385325
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,1536,64,0.01458346645037333
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,1536,32,0.00278613343834877
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,1536,32,0.014562132954597472
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,1024,65536,0.05397440195083618
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,1024,65536,0.053260799249013266
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,1536,2048,0.006528000036875407
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,1536,1024,0.00526506652434667
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,1024,16384,0.017108267545700072
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,1024,16384,0.023627734184265135
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,1536,768,0.005102933446566264
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,1024,12288,0.013573333621025085
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,1024,12288,0.022312533855438233
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,1536,1536,0.0060149331887563075
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,1024,10240,0.012194133798281352
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,1024,10240,0.021687465906143188
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,1024,8192,0.010474666953086853
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,1024,8192,0.020374399423599244
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,1536,512,0.00481279989083608
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,1536,256,0.004706133405367533
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,1536,128,0.004632533093293508
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,1024,7168,0.009896533687909444
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,1024,7168,0.019323732455571493
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,1024,6144,0.009986133376757304
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,1024,6144,0.018380800882975258
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,1024,5120,0.00920853316783905
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,1024,5120,0.018210132916768394
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,1024,16384,0.022695465882619222
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,1024,65536,0.07939199606577554
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,1024,4096,0.007919999957084655
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,1024,4096,0.017762132485707603
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,1024,3584,0.007549866537253062
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,1024,12288,0.018531199296315512
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,1024,3584,0.01771093408266703
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,1024,3072,0.0067775999506314594
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,1024,10240,0.016334933042526246
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,1024,3072,0.017015467087427773
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,1024,8192,0.01402346690495809
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,1024,2560,0.006287999947865804
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,1024,2560,0.01644480029741923
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,1024,7168,0.012422399719556172
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,1024,2048,0.005482666691144307
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,1024,2048,0.016759467124938966
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,1024,6144,0.01123306651910146
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,1024,1536,0.004901333153247834
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,1024,1536,0.015723733107248943
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,1024,5120,0.009891200065612792
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,1024,1024,0.004075733323891958
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,1024,1024,0.01539520025253296
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,1024,4096,0.008610133330027263
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,1024,768,0.0038090666135152185
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,1024,3072,0.007547733187675476
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,1024,768,0.015340800086657206
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,1024,512,0.0034122665723164878
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,1024,3584,0.008308266599973042
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,1024,512,0.015044266978899637
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,1024,256,0.0030591999491055804
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,1024,256,0.01474346617857615
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,1024,128,0.002976000060637792
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,1024,128,0.014763733744621277
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,1024,2048,0.006417066852251689
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,1024,64,0.0028789333999156954
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,1024,64,0.014869333306948344
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,1024,2560,0.00716480016708374
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,1024,32,0.002930133293072383
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,1024,32,0.014652799566586813
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,1024,1536,0.006087466577688853
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,1024,1024,0.005342933535575867
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,768,65536,0.042940799395243326
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,1024,768,0.005053866902987162
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,1024,512,0.004766933123270671
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,768,65536,0.048334932327270506
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,768,12288,0.011497599879900615
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,1024,256,0.004728533327579498
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,1024,128,0.004502399762471517
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,768,16384,0.013765333096186319
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,768,16384,0.0225055992603302
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,768,12288,0.020980266729990642
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,768,10240,0.010921600461006164
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,768,10240,0.020038400093714395
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,768,8192,0.00981760025024414
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,768,8192,0.018308266003926595
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,768,7168,0.009286399682362874
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,768,7168,0.01856000026067098
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,768,6144,0.008472533027331034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,768,6144,0.01845759948094686
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,768,5120,0.007952000200748443
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,768,5120,0.018974934021631876
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,768,65536,0.07929706573486328
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,768,4096,0.006628266473611195
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,768,16384,0.02278719941775004
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,768,4096,0.017842133839925133
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,768,12288,0.0184714674949646
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,768,3584,0.0070602665344874065
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,768,10240,0.016272000471750894
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,768,8192,0.01353493332862854
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,768,3584,0.017848533391952515
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,768,3072,0.006500266492366791
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,768,7168,0.012216533223787945
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,768,6144,0.010802132884661357
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,768,5120,0.009718400239944459
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,768,3072,0.016812799374262492
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,768,2560,0.006137600044409434
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,768,2560,0.016747732957204185
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,768,2048,0.005487999816735586
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,768,2048,0.016267733772595723
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,768,1536,0.004844800134499868
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,768,1536,0.015887999534606935
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,768,1024,0.004152533411979675
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,768,4096,0.008634666601816814
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,768,1024,0.016217600305875143
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,768,3584,0.008195200065771738
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,768,768,0.003819733361403147
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,768,768,0.015226667126019796
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,768,512,0.0034133332471052804
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,768,3072,0.007548800110816956
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,768,512,0.014867200454076131
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,768,256,0.003067733347415924
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,768,2560,0.007190399865309398
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,768,2048,0.006419200201829274
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,768,256,0.014634666840235391
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,768,128,0.002890666574239731
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,768,128,0.01453013320763906
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,768,1024,0.005283200244108836
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,768,1536,0.005970133344332377
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,768,64,0.0027594665686289472
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,768,32,0.00279573326309522
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,512,65536,0.031489066282908124
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,768,64,0.014578133821487427
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,768,768,0.005036800106366476
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,768,32,0.014337066809336343
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,768,512,0.00477866679430008
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,512,16384,0.020329600572586058
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,512,65536,0.0409717321395874
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,512,16384,0.010550399621327717
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,512,12288,0.00977280040582021
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,768,256,0.004628266890843709
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,512,12288,0.019475199778874717
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,512,10240,0.008771199981371562
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,512,10240,0.019237333536148073
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,512,8192,0.007993599772453308
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,512,8192,0.01843520005544027
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,768,128,0.004466133316357931
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,512,7168,0.007241599758466085
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,512,7168,0.01875200072924296
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,512,6144,0.006587733328342438
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,512,6144,0.01806186636288961
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,512,65536,0.07866026560465494
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,512,5120,0.006856533388296763
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,512,5120,0.018711467583974205
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,512,4096,0.006206933160622915
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,512,4096,0.017845332622528076
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,512,16384,0.02267520030339559
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,512,3584,0.006614399949709575
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,512,3584,0.017293866475423178
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,512,12288,0.01783039967219035
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,512,3072,0.00629013329744339
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,512,3072,0.01702079971631368
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,512,10240,0.015548800428708395
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,512,2560,0.006016000111897787
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,512,2560,0.01654293338457743
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,512,8192,0.013336533308029174
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,512,2048,0.005426133175690969
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,512,2048,0.016530133287111917
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,512,7168,0.011823999881744384
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,512,1536,0.004715733230113983
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,512,1536,0.015851733088493348
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,512,6144,0.010781866312026978
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,512,1024,0.004070399949947992
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,512,1024,0.015640532970428465
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,512,5120,0.009648000200589497
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,512,768,0.0037237333754698435
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,512,768,0.01513706644376119
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,512,4096,0.008654933174451191
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,512,512,0.0033973333736260734
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,512,512,0.014907733599344889
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,512,3584,0.008267733454704284
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,512,3072,0.007563733557860057
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,512,256,0.002980266759792964
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,512,256,0.01464959979057312
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,512,128,0.002898133297761281
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,512,128,0.0144896000623703
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,512,2048,0.006363733112812043
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,512,64,0.0026528000831604003
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,512,2560,0.007039999961853028
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,512,64,0.01444906691710154
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,512,32,0.002701866626739502
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,512,32,0.014434132973353067
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,256,65536,0.019151999553044637
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,256,65536,0.03130666613578796
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,256,16384,0.008236800134181977
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,256,16384,0.019731199741363524
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,512,1536,0.005959466596444448
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,512,1024,0.005276800195376078
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,256,12288,0.006839466591676076
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,256,12288,0.01873813271522522
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,256,10240,0.006625066697597504
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,256,10240,0.019038933515548705
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,512,512,0.004822400212287903
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,512,768,0.005066666503747304
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,256,8192,0.006311466793219249
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,256,8192,0.018168532848358156
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,256,7168,0.006295466423034668
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,256,7168,0.018953599532445273
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,512,256,0.004543999830881754
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,512,128,0.004534400006135305
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,256,6144,0.006146133442719777
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,256,6144,0.01825173298517863
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,256,5120,0.006516266862551372
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,256,5120,0.01886826753616333
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,256,65536,0.07800319989522299
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,256,16384,0.021824000279108684
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,256,4096,0.0060693333546320595
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,256,12288,0.017488000790278117
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,256,4096,0.017722666263580322
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,256,10240,0.015317333738009134
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,256,3584,0.0065738668044408154
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,256,3584,0.01716266671816508
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,256,8192,0.013205333550771078
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,256,3072,0.006237866481145223
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,256,7168,0.01190613309542338
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,256,3072,0.016950400670369466
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,256,2560,0.005998933315277099
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,256,6144,0.010699733098347982
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,256,2560,0.0164000004529953
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,256,5120,0.009699199597040813
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,256,2048,0.005347200234731038
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,256,1536,0.004681600133577982
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,256,2048,0.016174933314323424
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,256,4096,0.008633599678675333
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,256,1536,0.01576746702194214
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,256,1024,0.004007466634114583
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,256,1024,0.015513599912325541
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,256,768,0.003706666578849157
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,256,768,0.015220266580581666
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,256,512,0.0033183999359607695
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,256,3072,0.007551999886830647
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,256,512,0.014897066354751586
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,256,256,0.0029663999875386557
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,256,256,0.014615466197331747
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,256,3584,0.008258133133252462
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,256,2560,0.007169066866238911
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,256,128,0.0028789333999156954
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,256,2048,0.0064522668719291685
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,256,128,0.014410666624704995
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,256,64,0.002733866622050603
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,256,64,0.014416000247001648
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,256,32,0.00264533335963885
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,256,32,0.014273066322008768
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,128,65536,0.012341333429018657
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,128,65536,0.0281333327293396
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,128,16384,0.006223999957243601
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,128,16384,0.019528534015019736
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,256,512,0.004773333172003428
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,256,1024,0.005298133194446564
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,128,12288,0.00613013356924057
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,256,768,0.004956800242265066
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,256,1536,0.005981866518656413
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,256,256,0.004479999840259552
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,128,12288,0.018592000007629395
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,128,10240,0.006305066744486491
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,128,10240,0.01899519960085551
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,128,8192,0.006321066617965698
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,128,8192,0.018345600366592406
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,128,7168,0.006123733520507812
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,128,7168,0.01869973341623942
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,128,6144,0.005930666625499725
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,128,6144,0.017927465836207072
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,256,128,0.004386133452256521
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,128,5120,0.00633493314186732
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,128,5120,0.018437333901723228
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,128,65536,0.0773525317509969
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,128,4096,0.005930666625499725
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,128,16384,0.021869866053263347
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,128,4096,0.017511467138926186
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,128,3584,0.006357333560784657
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,128,12288,0.017534933487574258
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,128,3584,0.017146666844685875
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,128,3072,0.0060138667623202005
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,128,3072,0.016826667388280234
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,128,10240,0.015312000115712484
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,128,2560,0.00602346658706665
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,128,2560,0.016328533490498863
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,128,8192,0.013271466890970866
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,128,2048,0.0053375999132792154
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,128,2048,0.016422399878501893
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,128,7168,0.011795199910799662
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,128,1536,0.0046741331617037455
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,128,1536,0.015794133146603904
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,128,6144,0.010681600371996561
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,128,1024,0.003957333415746689
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,128,1024,0.015403733650843302
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,128,5120,0.009803733229637146
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,128,768,0.003640533238649368
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,128,768,0.014924800395965577
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,128,4096,0.008590933680534363
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,128,512,0.003337600082159042
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,128,512,0.014828800161679586
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,128,3072,0.007447466750939687
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,128,256,0.003031466652949651
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,128,256,0.014510933558146158
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,128,3584,0.008268799881140392
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,128,2048,0.0064746667941411335
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,128,128,0.0027829334139823914
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,128,128,0.014435199896494546
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,128,64,0.002765866617361705
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,128,64,0.014490666985511779
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,128,2560,0.007055999835332234
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,128,32,0.00276799996693929
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,64,65536,0.00928000013033549
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,128,32,0.01448853313922882
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,64,65536,0.026441599925359088
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,64,16384,0.006129066646099091
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,64,12288,0.006052266558011373
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,64,16384,0.019402666886647543
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,128,1024,0.0052490666508674625
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,64,12288,0.018461867173512777
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,64,10240,0.006251733501752217
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,64,10240,0.018845866123835243
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,64,8192,0.006101333101590474
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,128,1536,0.005926399926344554
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,64,8192,0.018769067525863648
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,128,768,0.005046399931112925
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,64,7168,0.0059562668204307554
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,64,7168,0.018685867389043175
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,64,6144,0.0059562668204307554
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,64,6144,0.018694400787353516
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,64,5120,0.006171733140945435
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,64,5120,0.01824000080426534
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,64,4096,0.005804799993832906
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,128,256,0.004690133531888326
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,128,512,0.0047775998711586
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,64,128,128,0.004509866734345754
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,64,4096,0.01769066651662191
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,64,3584,0.006295466423034668
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,64,3584,0.017156267166137697
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,64,3072,0.005892266829808554
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,64,3072,0.016696532567342125
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,64,2560,0.005972266693909963
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,64,2560,0.01664426624774933
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,64,2048,0.005346133311589559
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,64,2048,0.0159850666920344
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,64,1536,0.004686933259169261
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,64,1536,0.015715199708938598
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,64,1024,0.004009599983692169
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,64,1024,0.015635200341542563
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,64,768,0.003565866748491923
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,64,768,0.015067733327547708
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,64,512,0.0033749334514141084
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,64,128,0.014169599612553915
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,64,512,0.014989866813023885
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,64,256,0.002916266769170761
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,64,32,0.0026911998788515727
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,64,256,0.01443839967250824
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,64,128,0.0028277332584063213
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,64,64,0.002690133452415466
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,64,64,0.014386133352915446
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,64,32,0.014597333470980325
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,32,65536,0.008266666531562805
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,32,65536,0.026921600103378296
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,32,16384,0.006129066646099091
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,32,16384,0.019195733467737834
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,32,12288,0.005970133344332377
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,32,12288,0.01848533352216085
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,32,10240,0.006263466676076253
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,32,10240,0.01923840045928955
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,32,8192,0.006078933179378509
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,32,8192,0.018284799655278523
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,32,4096,0.005706666906674703
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,32,7168,0.005871999760468801
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,32,7168,0.01821333368619283
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,32,6144,0.005752533177534739
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,32,5120,0.006221866607666016
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,32,6144,0.017946666479110716
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,32,5120,0.01871466636657715
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,32,4096,0.017516799767812095
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,32,3584,0.006077866752942403
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,32,3584,0.017156267166137697
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,32,3072,0.005718400080998739
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,32,3072,0.017154133319854735
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,32,2560,0.006018133461475372
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,32,2560,0.01614293356736501
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,32,2048,0.005401599903901418
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,32,2048,0.016130133469899496
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,32,1536,0.00462719996770223
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,32,1536,0.01562666694323222
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,32,1024,0.0039594667653242745
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,32,1024,0.015629866719245912
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,32,256,0.014597333470980325
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,32,768,0.0036938667297363283
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,32,64,0.0026591998835404714
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,32,768,0.015936000148455302
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,32,512,0.003340800106525421
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,32,512,0.01511253317197164
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,32,256,0.002916266769170761
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,32,128,0.002811733384927114
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,32,128,0.014354133605957031
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,32,64,0.01437440017859141
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,64,32,32,0.002621866762638092
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,64,32,32,0.014155733585357665
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,65536,16384,0.3965354601542155
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,65536,12288,0.30275627772013347
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,65536,16384,0.366595204671224
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,65536,16384,0.7576970418294271
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,65536,12288,0.569873046875
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,65536,10240,0.25494292577107747
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,65536,10240,0.47806933720906575
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,65536,8192,0.38505706787109373
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,65536,8192,0.20923840204874672
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,65536,12288,0.2775712013244629
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,65536,7168,0.3339189211527506
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,65536,7168,0.1844586690266927
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,65536,10240,0.23322240511576334
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,65536,7168,0.1658453305562337
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,65536,6144,0.2891050656636556
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,65536,6144,0.1612234592437744
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,65536,8192,0.2118325392405192
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,65536,6144,0.14416534105936687
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,65536,5120,0.24581546783447267
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,65536,5120,0.13725226720174152
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,65536,4096,0.1948736031850179
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,65536,4096,0.12412160237630207
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,65536,3584,0.17236480712890626
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,65536,3584,0.102347731590271
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,65536,3584,0.09836373329162598
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,65536,3072,0.1479189395904541
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,65536,3072,0.0908191998799642
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,65536,2560,0.12508266766866047
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,65536,2560,0.07828266620635986
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,65536,2048,0.10094400246938069
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,65536,1536,0.07739946842193604
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,65536,2048,0.06627306540807089
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,65536,2048,0.05698879957199097
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,65536,5120,0.12583359877268474
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,65536,1536,0.054819198449452725
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,65536,1024,0.053597867488861084
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,65536,1024,0.04255253473917643
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,65536,768,0.04250239928563436
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,65536,768,0.03664106527964274
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,65536,512,0.030213334163029987
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,65536,512,0.02942613363265991
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,65536,4096,0.09946560064951579
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,65536,256,0.019436800479888917
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,65536,256,0.02210879921913147
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,65536,3072,0.07709333101908365
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,65536,128,0.01092693308989207
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,65536,2560,0.0655402660369873
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,65536,128,0.01939199964205424
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,65536,64,0.007416533430417378
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,65536,64,0.01736746629079183
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,65536,32,0.005820799867312113
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,65536,32,0.017709867159525553
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,65536,1536,0.042768001556396484
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,65536,1024,0.03504000107447307
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,65536,768,0.02587946653366089
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,65536,512,0.020104533433914183
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,16384,65536,0.38892478942871095
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,16384,65536,0.7499807993570964
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,16384,16384,0.18731625874837238
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,16384,16384,0.11034666697184245
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,16384,12288,0.16485759417215984
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,16384,12288,0.08757546742757162
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,16384,10240,0.11782613595326741
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,16384,10240,0.06425919930140177
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,16384,10240,0.07507519721984864
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,65536,256,0.01409066617488861
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,16384,8192,0.09560426870981852
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,16384,8192,0.06407999992370605
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,16384,7168,0.08394773006439209
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,16384,65536,0.36576318740844727
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,65536,128,0.01229759951432546
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,16384,7168,0.05781973203023275
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,16384,16384,0.10475733280181884
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,16384,12288,0.07512959639231363
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,16384,6144,0.07320319811503093
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,16384,6144,0.05770026842753092
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,16384,5120,0.06155733267466227
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,16384,8192,0.05264319976170858
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,16384,5120,0.04610346555709839
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,16384,7168,0.04692266782124837
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,16384,4096,0.05023893515268961
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,16384,4096,0.039879465103149415
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,16384,3584,0.04455360174179077
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,16384,6144,0.041427199045817056
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,16384,3584,0.03713279962539673
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,16384,3072,0.03866453170776367
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,16384,3072,0.034168533484141034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,16384,2560,0.033105067412058514
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,16384,2560,0.030921600262324017
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,16384,5120,0.03559360106786092
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,16384,2048,0.02712000012397766
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,16384,2048,0.027436800797780353
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,16384,4096,0.02998720010121663
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,16384,1536,0.021602133909861244
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,16384,1536,0.023744000991185506
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,16384,1024,0.017322667439778647
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,16384,1024,0.02093440095583598
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,16384,768,0.011825066804885865
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,16384,768,0.019619200627009073
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,16384,3584,0.030487465858459472
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,16384,512,0.008906666437784832
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,16384,512,0.018186666568120322
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,16384,256,0.00619946668545405
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,16384,256,0.01573013365268707
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,16384,3072,0.024280534187952677
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,16384,256,0.006318933268388112
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,16384,128,0.0037834666669368743
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,16384,128,0.015427199999491372
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,16384,64,0.003643733263015747
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,16384,64,0.015492266416549683
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,16384,32,0.003734400123357773
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,16384,32,0.01572266618410746
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,16384,2560,0.021437867482503255
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,16384,2048,0.017953066031138103
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,16384,1536,0.014794666568438211
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,12288,65536,0.3076373418172201
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,16384,1024,0.011596799890200297
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,12288,65536,0.5791925430297852
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,12288,16384,0.1493717352549235
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,12288,16384,0.09384106794993083
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,12288,65536,0.28092374801635744
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,12288,12288,0.13394986788431804
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,12288,12288,0.07237866719563803
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,12288,10240,0.10329919656117756
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,12288,10240,0.06283946832021078
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,12288,8192,0.07738773028055826
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,12288,8192,0.05397013425827026
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,12288,7168,0.0675061305363973
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,16384,768,0.010058666268984478
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,12288,7168,0.04896639982859294
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,16384,512,0.00862506628036499
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,12288,6144,0.05864959955215454
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,12288,6144,0.04419840176900228
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,12288,5120,0.04920639991760254
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,16384,128,0.005853866537412008
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,12288,5120,0.039614931742350264
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,12288,4096,0.04073280096054077
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,12288,4096,0.034612266222635905
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,12288,16384,0.07546133200327555
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,12288,3584,0.035843201478322345
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,12288,12288,0.058424532413482666
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,12288,10240,0.04952426751454671
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,12288,3584,0.03251306613286336
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,12288,3072,0.031113600730895995
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,12288,3072,0.029781333605448407
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,12288,8192,0.04160853226979573
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,12288,2560,0.02658240000406901
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,12288,2560,0.02729066610336304
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,12288,7168,0.03685119946797689
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,12288,2048,0.022171733776728313
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,12288,6144,0.032681600252787275
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,12288,2048,0.02447893420855204
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,12288,1536,0.017395200332005818
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,12288,1536,0.022345600525538127
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,12288,5120,0.028308266401290895
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,12288,1024,0.012513066331545511
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,12288,1024,0.019989333550135293
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,12288,4096,0.023774933815002442
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,12288,768,0.010097066561381023
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,12288,768,0.018398932615915933
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,12288,768,0.008752000331878663
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,12288,512,0.00782719999551773
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,12288,512,0.015506133437156677
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,12288,3584,0.021597866217295328
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,12288,256,0.00420906643072764
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,12288,256,0.015331199765205384
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,12288,128,0.0037077332536379496
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,12288,3072,0.019237333536148073
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,12288,128,0.015018666783968607
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,12288,64,0.003320533285538355
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,12288,2560,0.01687893271446228
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,12288,64,0.015203199783960977
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,12288,2048,0.014139733711878457
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,12288,32,0.003457066665093104
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,12288,32,0.015236266454060874
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,12288,1536,0.011925333738327026
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,10240,65536,0.48161598841349285
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,12288,1024,0.009867733716964722
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,10240,65536,0.26809600194295247
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,10240,16384,0.1302890698115031
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,10240,16384,0.08463040192921957
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,10240,12288,0.08834239641825357
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,10240,12288,0.0614677349726359
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,12288,512,0.006307200094064077
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,10240,10240,0.07395413716634115
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,10240,10240,0.05315946737925211
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,10240,8192,0.060158932209014894
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,12288,256,0.00555626650651296
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,12288,128,0.005245866874853769
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,10240,8192,0.046453332901000975
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,10240,7168,0.053632001082102455
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,10240,7168,0.04254293441772461
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,10240,6144,0.04726719856262207
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,10240,7168,0.03167999982833862
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,10240,6144,0.03909333149592082
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,10240,5120,0.03995413382848104
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,10240,5120,0.03492053349812825
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,10240,4096,0.03277013301849365
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,10240,4096,0.030637866258621214
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,10240,65536,0.23742186228434242
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,10240,16384,0.06475306749343872
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,10240,3584,0.03282133340835571
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,10240,3584,0.0290175994237264
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,10240,12288,0.05027413368225098
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,10240,3072,0.02568320035934448
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,10240,3072,0.027013333638509114
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,10240,2560,0.02262079914410909
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,10240,2560,0.024715733528137208
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,10240,10240,0.04246826569239299
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,10240,2048,0.018181333939234413
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,10240,2048,0.02258346676826477
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,10240,8192,0.035860268274943034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,10240,1536,0.014220800002415976
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,10240,1536,0.02099306583404541
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,10240,6144,0.028140799204508467
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,10240,1024,0.010426666339238484
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,10240,5120,0.02437653342882792
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,10240,4096,0.02065066695213318
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,10240,1024,0.01890666683514913
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,10240,768,0.00874773363272349
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,10240,768,0.01731733282407125
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,10240,512,0.006983466446399689
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,10240,3584,0.01873706579208374
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,10240,512,0.015557333827018738
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,10240,3072,0.016578132907549538
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,10240,256,0.0038090666135152185
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,10240,256,0.015473066767056783
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,10240,128,0.003505066782236099
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,10240,2560,0.014688000082969666
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,10240,128,0.015156267086664834
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,10240,64,0.0032437334458033243
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,10240,64,0.015098667144775391
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,10240,32,0.0032853332658608755
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,10240,32,0.015154133240381876
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,10240,2048,0.012564266721407572
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,8192,65536,0.38156372706095376
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,8192,65536,0.21234560012817383
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,10240,1536,0.010700800021489461
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,8192,16384,0.09364799658457437
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,8192,16384,0.0639626661936442
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,10240,1024,0.008904533584912618
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,10240,768,0.00737066666285197
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,8192,12288,0.07176000277201335
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,8192,12288,0.05199679931004843
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,8192,12288,0.04151360193888347
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,10240,512,0.005838933090368906
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,8192,10240,0.06009386777877808
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,8192,10240,0.0460319995880127
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,10240,256,0.005264000097910563
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,8192,8192,0.04910399913787842
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,8192,8192,0.04003413518269856
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,10240,128,0.004995200037956238
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,8192,7168,0.04336746533711751
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,8192,7168,0.03674026727676392
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,8192,65536,0.19226986567179363
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,8192,6144,0.038261334101359054
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,8192,16384,0.05315093199412028
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,8192,6144,0.03420586585998535
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,8192,5120,0.03232000072797139
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,8192,5120,0.030739200115203858
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,8192,4096,0.026627200841903686
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,8192,4096,0.027246934175491334
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,8192,3584,0.023706666628519692
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,8192,3584,0.025363200902938844
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,8192,3072,0.020895999670028687
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,8192,3072,0.024117332696914674
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,8192,10240,0.035412267843882246
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,8192,2560,0.017942400773366292
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,8192,8192,0.02985493342081706
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,8192,2560,0.02251519958178202
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,8192,2048,0.014693333705266317
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,8192,2048,0.021248000860214233
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,8192,7168,0.026753065983454387
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,8192,1536,0.011865599950154623
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,8192,1536,0.02002133329709371
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,8192,6144,0.02654079993565877
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,8192,1024,0.008970666925112407
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,8192,5120,0.020777599016825358
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,8192,1024,0.017977599302927652
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,8192,4096,0.01770346760749817
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,8192,768,0.00746666689713796
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,8192,3584,0.016242133577664693
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,8192,768,0.01578986644744873
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,8192,512,0.005836800237496694
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,8192,512,0.015481600165367126
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,8192,256,0.003818666686614355
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,8192,3072,0.014206932981808982
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,8192,256,0.015107199549674988
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,8192,128,0.003446399917205175
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,8192,2560,0.01297706663608551
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,8192,128,0.015092266599337259
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,8192,64,0.0032511999209721885
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,8192,2048,0.01123413344224294
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,8192,64,0.015135999520619711
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,8192,32,0.0032992000381151833
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,8192,32,0.015229866902033488
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,8192,1536,0.009946667154630025
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,7168,65536,0.20211092631022134
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,7168,65536,0.338696543375651
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,7168,16384,0.08982079823811849
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,7168,16384,0.06486826737721761
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,7168,12288,0.06346986691157022
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,8192,1024,0.007706666489442189
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,8192,768,0.006816000243028005
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,7168,12288,0.048255999883015946
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,7168,10240,0.05354346831639608
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,7168,10240,0.04269653161366781
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,8192,512,0.005797333518664042
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,7168,8192,0.043538133303324386
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,8192,256,0.0054954667886098225
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,7168,8192,0.03750720024108887
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,7168,7168,0.03845973412195842
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,7168,7168,0.03481493393580119
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,8192,128,0.005310933291912079
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,7168,6144,0.034125868479410806
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,7168,6144,0.03193813363711039
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,7168,5120,0.02906773289044698
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,7168,5120,0.02874666651089986
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,7168,65536,0.17502506573994953
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,7168,4096,0.027007999022801717
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,7168,16384,0.049184000492095946
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,7168,4096,0.025659734010696413
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,7168,3584,0.021555199225743612
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,7168,12288,0.038040534655253096
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,7168,3584,0.024360533555348715
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,7168,3072,0.018969599405924478
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,7168,10240,0.03276906609535217
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,7168,3072,0.023095466693242393
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,7168,2560,0.016210132837295534
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,7168,8192,0.027353600660959883
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,7168,2560,0.02173653244972229
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,7168,2048,0.013304533561070761
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,7168,2048,0.020207999149958293
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,7168,7168,0.024689066410064697
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,7168,1536,0.010826667149861652
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,7168,1536,0.019385600090026857
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,7168,6144,0.02174826661745707
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,7168,1024,0.008255999783674877
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,7168,1024,0.016075733304023742
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,7168,5120,0.019016534090042114
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,7168,4096,0.016247466206550598
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,7168,768,0.007051733136177063
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,7168,3584,0.014408533771832785
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,7168,768,0.015190399686495461
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,7168,768,0.006262399752934774
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,7168,3072,0.013059199849764506
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,7168,512,0.0043605332573254905
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,7168,512,0.015242666999499003
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,7168,256,0.0035743998984495797
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,7168,2560,0.012035199999809265
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,7168,256,0.015102932850519816
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,7168,256,0.005087999999523163
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,7168,128,0.0033610666791598
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,7168,128,0.014919466773668923
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,7168,2048,0.010566400488217671
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,7168,64,0.0031317333380381264
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,7168,64,0.015009066462516785
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,7168,32,0.003197866678237915
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,7168,32,0.015025066335995993
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,7168,1536,0.008994133273760477
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,6144,16384,0.0794485330581665
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,6144,65536,0.28748906453450523
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,6144,65536,0.15485119819641113
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,6144,65536,0.17159679730733235
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,6144,16384,0.05493333339691162
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,6144,12288,0.06213759978612264
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,6144,12288,0.045021867752075194
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,7168,1024,0.00664213349421819
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,6144,10240,0.05270826816558838
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,6144,10240,0.03990826606750488
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,6144,8192,0.04031466643015544
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,6144,8192,0.03549866676330567
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,6144,7168,0.0352565328280131
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,6144,7168,0.032656000057856245
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,6144,6144,0.03097493251164754
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,6144,6144,0.030082132418950396
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,7168,512,0.005482666691144307
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,6144,5120,0.026281599203745527
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,6144,5120,0.028109866380691528
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,7168,128,0.004993066688378652
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,6144,4096,0.022233599424362184
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,6144,4096,0.024948267141977946
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,6144,16384,0.04947520097096761
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,6144,12288,0.034322134653727215
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,6144,3584,0.01991573373476664
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,6144,10240,0.029373866319656373
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,6144,3584,0.02390399972597758
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,6144,3072,0.017409066359202065
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,6144,3072,0.022574933369954427
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,6144,8192,0.02463360031445821
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,6144,2560,0.015040000279744467
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,6144,2560,0.02139093279838562
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,6144,2048,0.01271573305130005
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,6144,7168,0.022061866521835328
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,6144,2048,0.02018346587816874
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,6144,1536,0.010294399658838908
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,6144,6144,0.0195850670337677
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,6144,1536,0.01879040002822876
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,6144,1024,0.007781333227952321
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,6144,1024,0.015625600020090738
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,6144,5120,0.01722559928894043
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,6144,768,0.006593066453933716
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,6144,768,0.015642666816711427
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,6144,4096,0.014486400286356607
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,6144,512,0.004006399959325791
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,6144,512,0.01518186628818512
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,6144,3584,0.013172266880671182
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,6144,3072,0.012065066893895467
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,6144,256,0.0034858666360378264
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,6144,256,0.005093333125114441
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,6144,256,0.014972800016403198
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,6144,128,0.0032981333633263906
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,6144,2560,0.011081600189208984
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,6144,128,0.014816000064214071
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,6144,64,0.0031338666876157125
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,6144,2048,0.009763200084368389
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,6144,64,0.014775466918945313
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,6144,32,0.0031317333380381264
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,6144,1536,0.008414933085441589
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,6144,32,0.014985600113868713
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,5120,16384,0.06582293510437012
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,5120,65536,0.15038827260335286
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,5120,65536,0.2522677262624105
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,6144,1024,0.006492800017197926
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,5120,16384,0.049686400095621745
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,6144,768,0.006165333092212677
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,5120,12288,0.058107733726501465
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,5120,12288,0.03949546813964844
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,6144,512,0.005413333574930826
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,5120,10240,0.04386986494064331
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,5120,10240,0.03528000116348266
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,5120,10240,0.026358399788538617
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,5120,8192,0.035740800698598224
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,5120,8192,0.03146880070368449
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,5120,7168,0.02863573431968689
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,5120,7168,0.029399466514587403
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,5120,7168,0.020125865936279297
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,5120,6144,0.025613866249720257
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,5120,6144,0.026837333043416338
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,6144,128,0.004741333425045013
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,5120,65536,0.13948480288187665
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,5120,5120,0.02183039983113607
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,5120,5120,0.024599466721216837
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,5120,16384,0.03906346559524536
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,5120,4096,0.018175999323527016
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,5120,4096,0.022692267100016275
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,5120,12288,0.03066986600557963
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,5120,4096,0.01323306659857432
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,5120,3584,0.021898667017618813
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,5120,3584,0.01623679995536804
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,5120,3072,0.014364799857139588
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,5120,3072,0.02085439960161845
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,5120,2560,0.012346667051315308
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,5120,2560,0.019950934251149497
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,5120,2048,0.01051200032234192
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,5120,2048,0.019108267625172932
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,5120,2048,0.009199999769528707
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,5120,8192,0.022075732549031578
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,5120,1536,0.008810666203498841
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,5120,1536,0.017529600858688356
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,5120,1024,0.006941866874694824
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,5120,1024,0.015889066457748412
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,5120,6144,0.01761173407236735
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,5120,768,0.005286400020122528
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,5120,768,0.015468800067901611
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,5120,5120,0.017025067408879598
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,5120,512,0.003818666686614355
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,5120,512,0.015719466408093772
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,5120,256,0.0033919999996821085
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,5120,256,0.015051733454068503
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,5120,3584,0.012245333194732666
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,5120,128,0.0031946666538715364
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,5120,3072,0.011174399654070537
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,5120,128,0.014975999792416891
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,5120,2560,0.010488532980283101
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,5120,64,0.0030400000512599947
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,5120,64,0.014814933141072592
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,5120,32,0.003033600002527237
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,5120,32,0.014879999558130899
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,5120,1536,0.0075328002373377485
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,4096,65536,0.1908138593037923
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,4096,65536,0.12060480117797852
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,5120,1024,0.006318933268388112
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,4096,16384,0.05518506765365601
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,4096,16384,0.04279359976450602
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,4096,12288,0.0410805344581604
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,4096,12288,0.03499626715977987
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,4096,10240,0.03478506803512573
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,4096,10240,0.03173226714134216
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,5120,768,0.006074666480223338
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,4096,8192,0.028820266326268513
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,4096,8192,0.02839786609013875
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,5120,512,0.005274666845798493
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,5120,256,0.004907733201980591
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,4096,7168,0.026100265979766845
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,4096,7168,0.025997867186864216
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,4096,6144,0.0213919997215271
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,4096,6144,0.024641066789627075
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,5120,128,0.004905599852403005
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,4096,5120,0.018313600619633993
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,4096,5120,0.023121066888173423
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,4096,65536,0.11660799980163575
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,4096,16384,0.032382933298746745
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,4096,4096,0.015174399813016257
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,4096,4096,0.021625600258509316
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,4096,12288,0.02571733395258586
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,4096,3584,0.013359999656677246
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,4096,3584,0.02073813279469808
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,4096,10240,0.022004266579945884
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,4096,3072,0.01211840013662974
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,4096,3072,0.020082134008407592
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,4096,2560,0.010443733135859171
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,4096,2560,0.019435733556747437
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,4096,8192,0.0184063990910848
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,4096,2048,0.009036800265312195
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,4096,2048,0.018221867084503175
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,4096,7168,0.016681599617004394
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,4096,1536,0.00767146646976471
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,4096,1536,0.016407466928164163
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,4096,6144,0.015110400319099427
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,4096,1024,0.0060586666067441305
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,4096,1024,0.015460266669591268
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,4096,5120,0.01330880026022593
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,4096,768,0.0039594667653242745
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,4096,768,0.015572266777356467
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,4096,4096,0.01186346709728241
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,4096,512,0.0036149332920710243
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,4096,3584,0.011162666479746501
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,4096,512,0.014987732966740927
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,4096,256,0.003219199925661087
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,4096,3072,0.010129066308339436
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,4096,256,0.014762666821479798
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,4096,128,0.002997333308060964
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,4096,128,0.014556800325711569
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,4096,2560,0.009454933802286784
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,4096,64,0.00288426677385966
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,4096,64,0.01480959951877594
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,4096,32,0.0030250666042168934
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,4096,2048,0.008266666531562805
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,4096,32,0.015092266599337259
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,3584,65536,0.17671786944071452
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,3584,65536,0.11402239799499511
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,4096,1536,0.0070720002055168155
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,4096,1024,0.00595306654771169
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,3584,16384,0.045900801817576095
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,3584,16384,0.039654401938120525
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,3584,12288,0.037264001369476316
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,3584,12288,0.03300693432490031
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,4096,768,0.005857066810131073
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,3584,10240,0.0313920001188914
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,3584,10240,0.029765333731969195
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,4096,512,0.005310933291912079
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,3584,8192,0.026115200916926068
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,3584,8192,0.02614293297131856
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,4096,256,0.004953599969546
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,3584,7168,0.023740800221761067
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,4096,128,0.004802133142948151
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,3584,7168,0.02500586708386739
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,3584,6144,0.019291732708613077
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,3584,6144,0.023433599869410196
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,3584,5120,0.016353066762288412
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,3584,5120,0.022371200720469157
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,3584,65536,0.10682026545206706
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,3584,4096,0.013390933473904928
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,3584,16384,0.030186667044957476
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,3584,4096,0.020702934265136717
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,3584,3584,0.012243200341860454
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,3584,12288,0.02380266586939494
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,3584,3584,0.020121600230534872
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,3584,3072,0.01097813347975413
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,3584,3072,0.019681066274642944
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,3584,10240,0.02063573400179545
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,3584,2560,0.009662933150927226
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,3584,2560,0.01885546644528707
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,3584,8192,0.017375999689102174
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,3584,2048,0.008409600456555684
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,3584,2048,0.0173962672551473
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,3584,7168,0.015581867098808289
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,3584,1536,0.0073290665944417315
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,3584,1536,0.01585706671079
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,3584,6144,0.014094932874043783
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,3584,1024,0.005456000069777171
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,3584,1024,0.015786666671435037
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,3584,1024,0.005834666887919108
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,3584,768,0.0040277334551016486
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,3584,5120,0.012498133381207784
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,3584,768,0.015412267049153647
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,3584,512,0.0036533333361148832
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,3584,512,0.015019733707110086
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,3584,4096,0.011092266440391541
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,3584,256,0.003268266717592875
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,3584,256,0.014788267016410828
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,3584,3584,0.010612266262372334
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,3584,256,0.004997333387533823
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,3584,128,0.0030965333183606463
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,3584,128,0.014713600277900696
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,3584,64,0.002921599894762039
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,3584,3072,0.009678933024406432
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,3584,64,0.014513066411018372
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,3584,32,0.0029120000700155893
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,3584,32,0.01470186710357666
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,3584,2560,0.009292800227801006
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,3072,65536,0.1504576047261556
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,3072,65536,0.10001493295033773
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,3072,16384,0.04519999821980794
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,3584,2048,0.007500799993673961
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,3072,16384,0.03800106843312581
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,3072,12288,0.03323093255360921
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,3584,1536,0.006694399813810985
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,3072,12288,0.03216853340466817
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,3072,10240,0.028305067618687944
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,3072,10240,0.029203200340270997
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,3072,8192,0.024283732970555624
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,3072,8192,0.026205867528915405
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,3072,8192,0.017178666591644288
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,3072,7168,0.021782400210698445
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,3584,768,0.005749333401521047
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,3072,7168,0.02349546750386556
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,3584,512,0.005187200009822845
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,3072,6144,0.019425066312154134
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,3072,6144,0.023269333442052207
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,3072,5120,0.016747732957204185
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,3072,5120,0.021718400716781616
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,3584,128,0.0047978664437929785
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,3072,4096,0.01216426690419515
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,3072,4096,0.020522665977478028
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,3072,65536,0.10869546731313069
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,3072,3584,0.011178666353225708
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,3072,16384,0.03054080009460449
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,3072,3584,0.019887999693552653
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,3072,3072,0.010166399677594503
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,3072,12288,0.02377706567446391
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,3072,3072,0.019074134031931558
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,3072,10240,0.02063039938608805
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,3072,2560,0.009041066964467366
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,3072,2560,0.017937066157658894
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,3072,2048,0.007854933540026348
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,3072,2048,0.016379732886950174
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,3072,7168,0.01562773287296295
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,3072,6144,0.014042666554450989
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,3072,1536,0.0067093332608540845
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,3072,1536,0.016292267044385276
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,3072,5120,0.012734933694203695
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,3072,1024,0.004626133541266123
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,3072,1024,0.01581439971923828
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,3072,4096,0.011113599936167399
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,3072,768,0.004051200052102407
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,3072,768,0.015320533514022827
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,3072,3584,0.010525866349538168
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,3072,512,0.0036320000886917113
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,3072,512,0.014990933736165366
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,3072,3072,0.0095360000928243
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,3072,256,0.003278933217128118
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,3072,256,0.014688000082969666
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,3072,2560,0.008564266562461852
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,3072,128,0.0030591999491055804
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,3072,128,0.014654933412869772
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,3072,2048,0.0072629332542419435
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,3072,64,0.002868266652027766
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,3072,64,0.014556800325711569
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,3072,32,0.0028959999481836954
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,3072,32,0.014516266187032065
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,2560,65536,0.12845653692881268
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,3072,1536,0.006410666803518932
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,2560,16384,0.03487253189086914
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,2560,65536,0.08697493076324463
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,2560,16384,0.03334506750106812
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,2560,12288,0.027778132756551104
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,2560,12288,0.02895146608352661
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,3072,1024,0.006021333237489065
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,2560,10240,0.024258132775624594
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,2560,10240,0.02640213370323181
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,3072,768,0.005688533186912537
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,2560,8192,0.020180267095565797
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,2560,8192,0.02463679909706116
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,3072,512,0.0051360001166661584
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,2560,7168,0.018147200345993042
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,2560,7168,0.023099732398986817
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,3072,256,0.004948266843954722
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,2560,6144,0.01609280010064443
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,2560,6144,0.02164693276087443
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,3072,128,0.004748799900213877
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,2560,5120,0.014012799660364787
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,2560,5120,0.02104960083961487
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,2560,65536,0.10254720052083333
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,2560,4096,0.012109866738319397
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,2560,16384,0.028540800015131634
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,2560,4096,0.019294933478037516
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,2560,3584,0.011262933413187664
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,2560,12288,0.022348799308141074
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,2560,3584,0.01907520095507304
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,2560,3072,0.009128533800443013
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,2560,3072,0.017935999234517417
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,2560,10240,0.019267199436823527
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,2560,2560,0.008361599842707316
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,2560,2560,0.017968000968297322
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,2560,8192,0.016390400131543477
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,2560,2048,0.007321600119272869
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,2560,2048,0.016334933042526246
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,2560,7168,0.014973866939544677
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,2560,1536,0.006011733412742614
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,2560,1536,0.016039466857910155
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,2560,6144,0.013464533289273582
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,2560,1024,0.004343466460704803
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,2560,1024,0.015544533729553223
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,2560,5120,0.01209920048713684
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,2560,768,0.004020266731580098
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,2560,768,0.015229866902033488
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,2560,768,0.005636266867319743
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,2560,512,0.003601066768169403
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,2560,512,0.015044266978899637
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,2560,4096,0.01083626647790273
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,2560,256,0.0032298666735490165
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,2560,256,0.014661332964897156
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,2560,256,0.00487253318230311
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,2560,128,0.0029098667204380036
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,2560,3584,0.010002133250236512
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,2560,128,0.014662399888038635
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,2560,128,0.0048096001148223875
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,2560,64,0.0028042666614055633
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,2560,64,0.01446399986743927
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,2560,32,0.0028629332780838014
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,2560,3072,0.008984532952308655
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,2560,32,0.014528000354766845
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,2560,2560,0.0079925333460172
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,2048,65536,0.10151999791463215
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,2560,2048,0.006939733525117238
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,2048,65536,0.07242560386657715
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,2560,1536,0.006213333209355672
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,2048,16384,0.028862933317820232
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,2048,16384,0.02982826630274455
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,2560,1024,0.005866666634877523
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,2048,12288,0.023306665817896526
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,2048,12288,0.02571093241373698
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,2048,10240,0.02028053402900696
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,2048,10240,0.024076799551645912
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,2560,512,0.005085866649945577
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,2048,8192,0.01665279964605967
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,2048,8192,0.02254400054613749
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,2048,7168,0.014988799889882406
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,2048,7168,0.021600000063578286
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,2048,7168,0.013617066542307535
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,2048,6144,0.013500799735387167
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,2048,6144,0.02119999925295512
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,2048,5120,0.012030933300654094
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,2048,5120,0.020122667153676353
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,2048,65536,0.09105599721272786
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,2048,4096,0.010615467031796774
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,2048,4096,0.019351466496785482
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,2048,16384,0.026055467128753663
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,2048,3584,0.009971200426419576
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,2048,3584,0.018582399686177573
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,2048,12288,0.01993173360824585
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,2048,3072,0.008737066388130188
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,2048,3072,0.0169813334941864
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,2048,10240,0.016962132851282754
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,2048,8192,0.014573867122332254
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,2048,2560,0.007698133091131846
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,2048,2560,0.01660160024960836
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,2048,2048,0.006557866434256236
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,2048,2048,0.016335999965667723
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,2048,6144,0.012749866644541422
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,2048,1536,0.00517439991235733
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,2048,1536,0.015666133165359496
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,2048,1024,0.004110933343569437
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,2048,5120,0.011276800433794658
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,2048,1024,0.015531733632087708
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,2048,768,0.0037834666669368743
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,2048,768,0.015352533260981242
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,2048,4096,0.009826133648554485
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,2048,512,0.0034965333839257562
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,2048,512,0.0151637335618337
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,2048,3584,0.009578667084376017
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,2048,256,0.0031744000812371576
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,2048,256,0.01482133368651072
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,2048,3072,0.008288000027338665
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,2048,128,0.0029237332443396253
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,2048,128,0.014727466305096946
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,2048,2560,0.007469866673151653
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,2048,64,0.002739199995994568
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,2048,2048,0.006905599931875865
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,2048,64,0.014494933684666953
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,2048,32,0.002872533351182938
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,2048,32,0.014460800091425577
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,1536,65536,0.07895359992980958
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,1536,65536,0.06291733185450235
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,2048,1536,0.006337066491444905
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,1536,16384,0.023451733589172363
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,2048,1024,0.005730133255322774
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,1536,16384,0.026290132602055864
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,1536,12288,0.01881493330001831
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,1536,12288,0.02363733251889547
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,1536,12288,0.018247467279434205
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,2048,768,0.00555626650651296
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,1536,10240,0.01614400049050649
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,1536,10240,0.02209279934565226
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,1536,8192,0.013578666249910989
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,1536,8192,0.022320000330607097
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,1536,7168,0.012386133273442585
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,1536,7168,0.021310933430989585
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,2048,512,0.005074133475621542
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,1536,6144,0.011507200201352437
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,1536,6144,0.02070080041885376
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,2048,256,0.0048320000370343525
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,1536,5120,0.010500267148017883
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,2048,128,0.004731733103593191
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,1536,5120,0.01948266625404358
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,1536,4096,0.009331199526786804
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,1536,4096,0.01907520095507304
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,1536,65536,0.08067413171132407
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,1536,3584,0.008736000458399455
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,1536,16384,0.02297066648801168
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,1536,3584,0.017190400759379068
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,1536,3072,0.008057599763075511
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,1536,10240,0.016081066926320393
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,1536,3072,0.016822399695714314
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,1536,8192,0.013869866728782654
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,1536,2560,0.007222400108973186
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,1536,7168,0.012742400169372559
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,1536,2560,0.01678826610247294
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,1536,2048,0.005820799867312113
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,1536,2048,0.016330666343371072
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,1536,1536,0.004917333523432413
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,1536,6144,0.011769599715868632
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,1536,1536,0.01601599951585134
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,1536,1024,0.004205866654713949
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,1536,1024,0.01583146651585897
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,1536,5120,0.01067626674969991
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,1536,768,0.003942399968703588
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,1536,768,0.015254400173823037
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,1536,4096,0.009337600072224934
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,1536,512,0.0035989334185918174
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,1536,512,0.014917332927385965
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,1536,3584,0.008422399560610454
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,1536,256,0.003172266731659571
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,1536,256,0.014696533481280008
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,1536,3072,0.007593599955240886
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,1536,128,0.0028757333755493166
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,1536,128,0.014451199769973755
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,1536,2560,0.007286400099595388
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,1536,2048,0.006537599861621857
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,1536,64,0.002757333219051361
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,1536,1536,0.006183466811974844
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,1536,64,0.014537599682807923
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,1536,32,0.002784000088771184
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,1536,32,0.014454399545987448
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,1024,65536,0.05295679966608683
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,1024,65536,0.04959466854731242
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,1536,1024,0.005407999952634176
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,1024,16384,0.018119466304779053
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,1024,16384,0.02325973312060038
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,1024,12288,0.013709867000579834
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,1536,768,0.005386666456858317
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,1024,12288,0.021307732661565146
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,1024,12288,0.018210132916768394
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,1024,10240,0.012458667159080505
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,1536,512,0.005010133484999338
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,1024,10240,0.021101866165796915
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,1536,256,0.004749866823355356
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,1536,128,0.0046015997727712
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,1024,8192,0.010761599739392598
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,1024,8192,0.01999893387158712
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,1024,8192,0.013646933436393737
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,1024,65536,0.07786986827850342
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,1024,7168,0.01009493370850881
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,1024,16384,0.02268480062484741
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,1024,7168,0.019523199399312338
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,1024,6144,0.00907306671142578
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,1024,10240,0.015819733341534935
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,1024,6144,0.018450133005777993
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,1024,6144,0.010897066195805867
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,1024,5120,0.009258666634559631
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,1024,5120,0.018385066588719686
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,1024,4096,0.008274133503437042
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,1024,4096,0.018322134017944337
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,1024,3584,0.007910400132338206
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,1024,3584,0.01733120083808899
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,1024,3072,0.006838400165239971
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,1024,3072,0.017013333241144814
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,1024,2560,0.006187733511130015
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,1024,2560,0.016722132762273155
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,1024,7168,0.012678399682044983
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,1024,2048,0.005478399991989136
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,1024,5120,0.009690666198730468
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,1024,2048,0.01630400021870931
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,1024,1536,0.004870399832725525
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,1024,4096,0.008685866991678875
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,1024,1536,0.0157642662525177
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,1024,1024,0.004173866907755534
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,1024,3584,0.008169599870840708
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,1024,1024,0.01565226713816325
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,1024,768,0.0038218667109807336
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,1024,768,0.015105066696802774
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,1024,3072,0.007528533538182576
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,1024,512,0.003433600068092346
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,1024,2560,0.007039999961853028
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,1024,512,0.014839466412862143
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,1024,256,0.0030933332939942675
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,1024,256,0.014727466305096946
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,1024,2048,0.006459733347098033
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,1024,128,0.0029056000212828318
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,1024,1536,0.006073600053787232
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,1024,128,0.014664533734321594
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,1024,64,0.002825599908828735
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,1024,1024,0.005342933535575867
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,1024,64,0.01446399986743927
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,1024,32,0.0027903998891512555
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,1024,32,0.01451520025730133
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,1024,768,0.005020800232887268
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,768,65536,0.042005332310994466
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,1024,512,0.004939733445644379
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,768,65536,0.043728001912434894
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,768,16384,0.013821867108345032
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,768,16384,0.021572266022364298
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,768,16384,0.022665599981943764
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,768,12288,0.011804800232251484
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,768,12288,0.021322667598724365
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,1024,256,0.004729599754015604
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,768,10240,0.01092906693617503
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,768,10240,0.020492800076802573
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,768,8192,0.01074666678905487
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,768,8192,0.019525333245595296
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,768,7168,0.010032000144322713
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,1024,128,0.004540800054868063
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,768,7168,0.01895893414815267
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,768,6144,0.009390933314959209
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,768,6144,0.01846826672554016
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,768,5120,0.00846506655216217
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,768,65536,0.07706027030944824
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,768,5120,0.018579200903574625
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,768,4096,0.007353599866231282
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,768,4096,0.017617066701253258
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,768,3584,0.006917333106199901
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,768,3584,0.017607466379801432
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,768,12288,0.01799573302268982
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,768,3072,0.006348800162474315
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,768,3072,0.0166485329469045
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,768,10240,0.015726932883262636
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,768,2560,0.006200533111890157
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,768,2560,0.016860800981521606
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,768,8192,0.013134933511416116
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,768,7168,0.0118559996287028
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,768,2560,0.007041066884994507
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,768,2048,0.005378133555253347
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,768,2048,0.016124799847602844
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,768,2048,0.006393600006898243
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,768,1536,0.004805333415667216
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,768,1536,0.015692800283432007
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,768,6144,0.010762666662534077
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,768,5120,0.009709866841634114
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,768,1024,0.004117333392302195
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,768,4096,0.0086517333984375
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,768,768,0.0037930667400360107
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,768,1024,0.015203199783960977
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,768,3584,0.008180266618728638
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,768,3072,0.007526400188604991
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,768,768,0.015118933717409768
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,768,512,0.00347626656293869
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,768,512,0.015096533298492431
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,768,256,0.0031615999837716425
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,768,256,0.014716800053914389
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,768,256,0.004665599763393402
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,768,128,0.002963199963172277
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,768,128,0.014442666371663412
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,768,128,0.004560000201066335
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,768,64,0.0026880001028378804
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,768,1536,0.005973333120346069
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,768,64,0.014502400159835815
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,768,32,0.0028309332827727
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,768,1024,0.005275733272234599
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,768,32,0.014504533012708029
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,512,65536,0.030449066559473676
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,512,16384,0.02138239940007528
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,512,65536,0.03798186779022217
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,512,16384,0.010905599594116211
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,512,12288,0.009619200229644775
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,512,12288,0.019371734062830607
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,512,10240,0.00992746651172638
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,512,10240,0.01904319922129313
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,512,8192,0.008795733253161114
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,768,768,0.00506986677646637
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,512,8192,0.018288000424702962
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,512,8192,0.01297706663608551
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,512,7168,0.008113066852092742
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,512,7168,0.018569600582122803
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,768,512,0.004799999793370565
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,512,6144,0.007320533196131389
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,512,6144,0.018289067347844443
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,512,5120,0.006543999910354615
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,512,5120,0.01850773294766744
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,512,4096,0.0062282666563987735
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,512,4096,0.017442133029301962
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,512,65536,0.07721066474914551
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,512,3584,0.006546133259932201
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,512,16384,0.02267626722653707
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,512,3584,0.017296000321706136
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,512,12288,0.017617066701253258
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,512,3072,0.006262399752934774
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,512,3072,0.01690453290939331
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,512,3072,0.007562666634718577
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,512,10240,0.014918399850527444
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,512,2560,0.006129066646099091
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,512,2560,0.016990933815638223
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,512,2048,0.005418666700522105
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,512,2048,0.01638826628526052
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,512,1536,0.004802133142948151
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,512,1536,0.016307199994723
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,512,7168,0.011735467116038005
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,512,1024,0.004053333401679992
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,512,6144,0.010651733477910359
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,512,1024,0.015737600127855935
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,512,768,0.0036650667587916053
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,512,768,0.015150933464368185
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,512,5120,0.009687466422716777
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,512,512,0.0033759998778502146
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,512,512,0.014853333433469137
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,512,4096,0.008591999610265095
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,512,256,0.0030432000756263735
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,512,256,0.014469333489735923
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,512,3584,0.008117333551247915
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,512,128,0.0028223998844623564
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,512,128,0.01458026667435964
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,512,2560,0.007066666583220164
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,512,64,0.002705066651105881
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,512,2048,0.006296533346176148
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,512,64,0.01434879998366038
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,512,32,0.002733866622050603
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,512,32,0.014391466975212097
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,256,65536,0.01895573337872823
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,256,65536,0.02918506662050883
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,512,1536,0.006004266440868378
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,256,16384,0.009179733196894328
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,256,16384,0.01941546599070231
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,512,1024,0.005241600175698599
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,256,12288,0.007632000247637431
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,256,12288,0.018524799744288126
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,256,10240,0.006986666719118755
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,512,768,0.005011199911435445
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,256,10240,0.018782933553059898
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,256,8192,0.006411733229955037
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,512,512,0.004748799900213877
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,256,8192,0.01830186645189921
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,512,256,0.004633600016434988
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,256,7168,0.006205866734186808
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,256,7168,0.01842666665712992
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,512,128,0.004469333092371622
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,256,6144,0.006050133208433787
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,256,6144,0.018293333053588868
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,256,65536,0.0768725315729777
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,256,5120,0.006444799900054932
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,256,16384,0.021702400843302407
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,256,5120,0.018503467241923012
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,256,12288,0.01725013256072998
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,256,4096,0.006018133461475372
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,256,4096,0.01801813244819641
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,256,3584,0.006538666784763336
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,256,10240,0.01502826710542043
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,256,3584,0.01733760039011637
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,256,3072,0.006221866607666016
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,256,3072,0.016724266608556113
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,256,8192,0.012769066294034324
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,256,2560,0.005998933315277099
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,256,2560,0.016371200482050575
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,256,2048,0.005385600030422211
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,256,2048,0.01637226641178131
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,256,7168,0.011788800358772278
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,256,1536,0.004729599754015604
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,256,1536,0.016058666507403056
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,256,6144,0.010645332932472228
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,256,1024,0.0039989332358042395
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,256,1024,0.01548693378766378
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,256,5120,0.009618133306503296
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,256,768,0.0036309334139029183
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,256,4096,0.008693333466847737
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,256,768,0.015491200486818948
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,256,512,0.003323733309904734
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,256,512,0.014910933375358582
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,256,3584,0.008083199958006541
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,256,512,0.004705066482226053
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,256,256,0.002963199963172277
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,256,256,0.014548266927401224
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,256,3072,0.007529599964618683
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,256,128,0.0028149334092934927
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,256,128,0.014654933412869772
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,256,64,0.0026911998788515727
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,256,64,0.014579199751218162
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,256,32,0.002696533252795537
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,256,2560,0.007021866738796234
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,128,65536,0.011623467008272808
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,256,32,0.01458560029665629
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,256,2048,0.006376533210277558
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,128,65536,0.025788799921671553
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,128,16384,0.006187733511130015
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,128,16384,0.019078399737675986
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,128,12288,0.006166400015354156
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,256,1536,0.005895466605822245
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,128,12288,0.018887466192245482
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,128,10240,0.006386133531729381
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,128,10240,0.019019732872645058
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,256,1024,0.0052714665730794275
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,128,8192,0.006239999830722809
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,128,8192,0.018151466051737467
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,256,768,0.005008000135421753
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,128,7168,0.006178133189678192
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,128,7168,0.019003732999165853
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,256,256,0.004620799918969473
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,128,6144,0.006006399790445963
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,128,6144,0.01798186699549357
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,256,128,0.0044725333650906885
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,128,5120,0.006367999811967213
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,128,5120,0.01844693422317505
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,128,65536,0.07582826614379883
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,128,16384,0.021682133277257286
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,128,4096,0.006004266440868378
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,128,4096,0.01746986707051595
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,128,12288,0.017324799299240114
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,128,3584,0.006420266628265381
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,128,3584,0.016934400796890257
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,128,3072,0.006109866499900818
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,128,10240,0.014879999558130899
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,128,3072,0.016665599743525186
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,128,2560,0.0060149331887563075
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,128,2560,0.016860800981521606
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,128,8192,0.012804266810417176
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,128,2048,0.005351466437180838
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,128,2048,0.01621333360671997
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,128,7168,0.011874133348464965
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,128,1536,0.004690133531888326
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,128,1536,0.016237866878509522
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,128,6144,0.010680533448855082
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,128,1024,0.00407679999868075
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,128,1024,0.01511679987112681
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,128,5120,0.009581866860389709
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,128,768,0.003638399889071783
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,128,768,0.015095466375350952
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,128,4096,0.008546132842699687
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,128,512,0.0034005333979924522
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,128,3584,0.008227199812730153
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,128,512,0.014818132917086283
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,128,3072,0.0074538667996724445
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,128,256,0.003102933367093404
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,128,256,0.014699733257293702
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,128,2560,0.007030400137106578
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,128,128,0.002844800055027008
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,128,128,0.014377599954605103
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,128,2048,0.006337066491444905
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,128,64,0.0026549334327379864
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,128,64,0.014465066790580749
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,128,32,0.0026549334327379864
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,128,1536,0.005902933577696482
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,128,32,0.014574933052062988
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,64,65536,0.008957866827646892
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,64,65536,0.024312533934911094
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,64,16384,0.006192000210285186
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,64,16384,0.01928640007972717
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,64,12288,0.006040533383687338
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,128,1024,0.005345066885153452
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,64,12288,0.018477867046991982
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,64,10240,0.006270933151245117
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,64,10240,0.018531199296315512
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,64,8192,0.006126933296521505
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,128,512,0.0047989333669344585
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,128,768,0.005067733426888784
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,64,7168,0.01842026710510254
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,64,8192,0.017918932437896728
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,64,7168,0.006066133578618368
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,64,6144,0.005868799984455109
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,64,6144,0.018023467063903807
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,64,5120,0.006287999947865804
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,128,256,0.004822400212287903
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,64,5120,0.018360533316930137
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,48,128,128,0.00452693353096644
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,64,4096,0.00600853314002355
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,64,4096,0.01735360026359558
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,64,3584,0.006340266764163971
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,64,3584,0.01748266617457072
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,64,3072,0.00598826656738917
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,64,3072,0.016774400075276693
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,64,2560,0.006006399790445963
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,64,2560,0.016383999586105348
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,64,768,0.003623466690381368
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,64,2048,0.005368533233801523
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,64,2048,0.01665279964605967
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,64,1536,0.004570666452248891
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,64,1536,0.01628159979979197
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,64,1024,0.0040287998815377556
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,64,1024,0.0153546671072642
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,64,768,0.014942933122316995
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,64,512,0.0032821332414944967
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,64,512,0.014954666296641031
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,64,256,0.002948266764481862
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,64,256,0.014402133226394654
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,64,128,0.002844800055027008
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,64,128,0.014505599935849508
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,64,64,0.0027434666951497394
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,64,64,0.014254933595657349
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,64,32,0.002656000107526779
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,64,32,0.014505599935849508
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,32,65536,0.009027199943860371
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,32,65536,0.02416426738103231
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,32,16384,0.006093866626421611
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,32,16384,0.018924800554911296
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,32,12288,0.006087466577688853
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,32,12288,0.01844053268432617
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,32,10240,0.006242133180300395
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,32,10240,0.018759467204411826
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,32,8192,0.006159999966621399
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,32,8192,0.01804373264312744
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,32,7168,0.00597866674264272
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,32,7168,0.018091734250386557
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,32,6144,0.00589333325624466
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,32,6144,0.0178656001885732
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,32,5120,0.00625600020090739
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,32,5120,0.018480000893274943
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,32,4096,0.00595413347085317
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,32,4096,0.01731520096460978
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,32,3584,0.006236800054709116
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,32,3584,0.01694399913152059
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,32,3072,0.006082133452097575
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,32,1536,0.015710933009783427
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,32,3072,0.0166293332974116
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,32,2560,0.005955199897289276
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,32,2560,0.016596266627311708
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,32,2048,0.005335466563701629
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,32,2048,0.015910399953524272
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,32,1536,0.004548266530036926
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,32,1024,0.004006399959325791
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,32,1024,0.015230933825174967
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,32,768,0.0035487999518712364
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,32,768,0.01499626636505127
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,32,512,0.0032757334411144257
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,32,512,0.014749866724014283
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,32,256,0.002915200094381968
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,32,256,0.014408533771832785
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,32,128,0.002825599908828735
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,32,128,0.014272000392278036
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,32,64,0.0025941332181294756
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,32,64,0.014519466956456503
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,48,32,32,0.0026176000634829206
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,48,32,32,0.014252799749374389
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,65536,16384,0.7559701283772786
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,65536,16384,0.3940352121988932
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,65536,12288,0.3014368057250977
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,65536,12288,0.5684192021687825
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,65536,10240,0.4761525472005208
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,65536,10240,0.27305386861165365
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,65536,8192,0.2127690633138021
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,65536,8192,0.379969056447347
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,65536,7168,0.3349034627278646
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,65536,7168,0.18291734059651693
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,65536,6144,0.1594805399576823
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,65536,6144,0.2888554573059082
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,65536,16384,0.36694186528523765
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,65536,12288,0.2755104064941406
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,65536,5120,0.24086079597473145
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,65536,10240,0.23162453969319663
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,65536,5120,0.1361205259958903
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,65536,4096,0.11127359867095947
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,65536,4096,0.19491200447082518
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,65536,8192,0.1866421381632487
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,65536,3584,0.17081813812255858
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,65536,3584,0.10073386828104655
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,65536,7168,0.16449599266052245
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,65536,3072,0.14671360651652018
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,65536,3072,0.08913493156433105
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,65536,6144,0.14249599774678548
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,65536,2560,0.12292266686757405
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,65536,2560,0.07724266846974691
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,65536,2048,0.09918506940205893
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,65536,2048,0.06472746531168619
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,65536,1536,0.07692906856536866
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,65536,5120,0.12037119865417481
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,65536,1536,0.05377279917399088
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,65536,4096,0.11138453483581542
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,65536,1024,0.06041066646575928
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,65536,1024,0.04134506781895955
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,65536,768,0.041351465384165446
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,65536,768,0.03544319868087768
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,65536,512,0.029175466299057005
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,65536,512,0.028406399488449096
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,65536,512,0.019495467344919838
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,65536,256,0.01634880006313324
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,65536,3584,0.08740693728129069
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,65536,256,0.021591466665267945
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,65536,3072,0.0763434648513794
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,65536,128,0.009581866860389709
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,65536,128,0.019421867529551187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,65536,64,0.007009066641330719
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,65536,64,0.01694399913152059
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,65536,32,0.005286400020122528
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,65536,32,0.017094399531682333
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,65536,2560,0.06475199858347574
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,65536,2048,0.0541375994682312
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,65536,1536,0.04179946581522624
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,16384,65536,0.389135996500651
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,16384,65536,0.7501269022623698
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,16384,16384,0.1861834685007731
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,65536,1024,0.03136746684710185
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,16384,16384,0.1263754685719808
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,16384,12288,0.14187520345052082
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,16384,16384,0.11286719640096028
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,16384,12288,0.0868725299835205
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,16384,10240,0.11784426371256511
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,65536,768,0.02490773399670919
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,16384,10240,0.07550506591796875
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,16384,8192,0.09546240170796713
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,16384,8192,0.06336106856664023
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,16384,7168,0.08400959968566894
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,16384,7168,0.0571722666422526
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,16384,6144,0.07344000339508057
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,16384,6144,0.05146453380584717
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,65536,256,0.01405333379904429
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,65536,128,0.011783466736475626
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,16384,5120,0.06144533157348633
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,16384,5120,0.04565226634343465
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,16384,5120,0.03531519969304402
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,16384,4096,0.049960533777872726
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,16384,4096,0.03965973456700643
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,16384,3584,0.04428266684214274
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,16384,3584,0.03685333331425984
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,16384,65536,0.36317332585652673
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,16384,3072,0.04456959962844849
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,16384,3072,0.03352320194244385
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,16384,12288,0.0746997356414795
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,16384,10240,0.06340053478876749
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,16384,2560,0.03271146615346272
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,16384,2560,0.03067306677500407
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,16384,2048,0.026804266373316447
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,16384,8192,0.05222613414128622
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,16384,2048,0.027110399802525838
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,16384,2048,0.01763733426729838
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,16384,1536,0.021026132504145305
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,16384,7168,0.048798934618632
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,16384,1536,0.023753599325815836
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,16384,1024,0.01483626663684845
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,16384,1024,0.02177066604296366
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,16384,768,0.011770666639010111
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,16384,6144,0.04118506511052449
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,16384,768,0.01927893360455831
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,16384,768,0.010156800349553425
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,16384,512,0.008880000313123066
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,16384,512,0.017307732502619425
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,16384,4096,0.029487999280293782
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,16384,256,0.005972266693909963
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,16384,256,0.01532373329003652
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,16384,3584,0.02686506708463033
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,16384,128,0.0037429332733154297
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,16384,128,0.0150218665599823
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,16384,128,0.005830400188763936
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,16384,64,0.0033813332517941795
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,16384,64,0.015084800124168397
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,16384,32,0.0034783999125162757
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,16384,32,0.015171200037002563
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,16384,3072,0.023785599072774253
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,16384,2560,0.02109439969062805
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,12288,65536,0.5709397633870442
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,12288,65536,0.30133867263793945
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,16384,1536,0.014814933141072592
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,16384,1024,0.011448533336321513
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,12288,16384,0.07500373522440593
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,12288,16384,0.1723584016164144
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,12288,16384,0.09278293450673422
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,12288,12288,0.11332800388336181
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,12288,12288,0.07235626379648843
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,12288,10240,0.09552319844563803
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,12288,10240,0.06261759996414185
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,12288,8192,0.0779584010442098
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,12288,8192,0.05354346831639608
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,16384,512,0.008373333017031352
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,12288,7168,0.06827946503957114
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,12288,7168,0.048706134160359696
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,12288,6144,0.059027198950449625
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,12288,6144,0.04387093385060628
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,16384,256,0.006239999830722809
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,12288,6144,0.03203199903170268
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,12288,5120,0.04900693496068319
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,12288,5120,0.03927786747614543
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,12288,4096,0.04050346612930298
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,12288,4096,0.03436906735102336
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,12288,3584,0.03574933211008708
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,12288,3584,0.03192853331565857
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,12288,65536,0.28004372914632164
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,12288,3072,0.03510826826095581
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,12288,3072,0.0298634668191274
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,12288,12288,0.058000000317891445
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,12288,10240,0.04899306694666545
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,12288,2560,0.0265120009581248
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,12288,2560,0.026807467142740887
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,12288,2048,0.0218122661113739
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,12288,8192,0.04075413147608439
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,12288,2048,0.024046933650970458
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,12288,7168,0.03632426659266154
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,12288,1536,0.017097600301106772
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,12288,1536,0.021810134251912437
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,12288,1024,0.012113066514333089
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,12288,5120,0.027940267324447633
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,12288,1024,0.019409066438674925
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,12288,1024,0.009677867094675701
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,12288,768,0.00993173321088155
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,12288,768,0.01825066606203715
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,12288,512,0.007573333382606506
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,12288,4096,0.02367786765098572
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,12288,512,0.015511467059453329
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,12288,256,0.004025600105524063
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,12288,256,0.015332266688346863
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,12288,3584,0.02132693330446879
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,12288,128,0.003475199888149897
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,12288,128,0.015099733074506124
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,12288,128,0.005020800232887268
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,12288,64,0.003257599969704946
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,12288,64,0.014842666188875833
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,12288,32,0.0032885332902272543
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,12288,32,0.014889599879582724
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,10240,65536,0.4874133427937825
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,12288,3072,0.01904426614443461
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,12288,2560,0.01679253379503886
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,12288,2048,0.014103466272354126
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,10240,65536,0.2599477291107178
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,10240,16384,0.12795519828796387
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,12288,1536,0.011710932850837708
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,10240,65536,0.24026026725769042
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,10240,16384,0.08016640345255534
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,10240,12288,0.09815573692321777
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,10240,12288,0.06132586797078451
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,10240,12288,0.056626133124033605
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,12288,768,0.008598400155703227
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,10240,10240,0.08286506334940592
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,10240,10240,0.05365546544392904
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,12288,512,0.0065290664633115125
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,10240,8192,0.06821333567301432
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,10240,8192,0.0457696000734965
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,12288,256,0.005633066594600678
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,10240,7168,0.05436160167058309
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,10240,7168,0.04164586861928304
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,10240,6144,0.047764265537261964
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,10240,6144,0.037913600603739425
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,10240,6144,0.031215999523798627
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,10240,5120,0.03993920087814331
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,10240,5120,0.03396693468093872
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,10240,16384,0.07425386905670166
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,10240,10240,0.042481064796447754
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,10240,4096,0.032793599367141726
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,10240,4096,0.03033813238143921
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,10240,8192,0.03520853519439697
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,10240,3584,0.02911146680514018
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,10240,7168,0.03167253335316976
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,10240,3584,0.028495999177296956
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,10240,3584,0.018549333016077675
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,10240,3072,0.025552000602086383
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,10240,3072,0.02616853316624959
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,10240,2560,0.021734400590260824
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,10240,2560,0.024445867538452147
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,10240,5120,0.024235733350118003
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,10240,2048,0.017848533391952515
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,10240,4096,0.02053546706835429
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,10240,1536,0.02063573400179545
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,10240,2048,0.022394667069117226
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,10240,1536,0.014007467031478881
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,10240,1024,0.010220799843470256
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,10240,1024,0.01876373291015625
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,10240,3072,0.016618667046229045
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,10240,768,0.00848640004793803
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,10240,2560,0.014693333705266317
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,10240,768,0.016744534174601235
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,10240,768,0.007355733215808869
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,10240,512,0.006797866523265838
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,10240,512,0.0156960000594457
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,10240,256,0.015099733074506124
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,10240,2048,0.012594133615493774
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,10240,1536,0.010647466778755188
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,10240,256,0.0035978667438030243
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,10240,256,0.005421866476535797
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,10240,128,0.003335466732581457
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,10240,128,0.01493333379427592
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,10240,64,0.0031530665854612983
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,10240,1024,0.008891733487447102
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,10240,64,0.014862933754920959
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,10240,32,0.003219199925661087
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,10240,32,0.015059199929237366
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,8192,16384,0.09350612958272299
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,8192,65536,0.20860160191853844
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,8192,65536,0.3771199862162272
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,8192,65536,0.19651734034220378
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,8192,16384,0.06302613417307536
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,8192,12288,0.0716917355855306
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,10240,512,0.0061365331212679545
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,10240,128,0.00488319993019104
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,8192,12288,0.054423467318216956
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,8192,10240,0.06209493478139242
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,8192,10240,0.04559359947840373
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,8192,16384,0.05332159996032715
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,8192,8192,0.04881493250528972
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,8192,8192,0.039640533924102786
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,8192,7168,0.042982399463653564
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,8192,7168,0.03622506856918335
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,8192,6144,0.03764373461405436
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,8192,6144,0.03328426678975423
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,8192,5120,0.03216639955838521
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,8192,5120,0.0302293340365092
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,8192,12288,0.046845865249633786
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,8192,4096,0.026503467559814455
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,8192,4096,0.027090134223302205
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,8192,3584,0.0235317329565684
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,8192,3584,0.025465599695841473
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,8192,3072,0.020687999327977498
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,8192,3072,0.02407146692276001
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,8192,10240,0.035837864875793456
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,8192,2560,0.017761067549387614
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,8192,2560,0.022170666853586832
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,8192,2560,0.013106133540471396
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,8192,2048,0.014486400286356607
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,8192,2048,0.020781866709391274
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,8192,8192,0.030406399567921953
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,8192,1536,0.011571199695269267
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,8192,1536,0.019272534052530925
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,8192,7168,0.02672533392906189
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,8192,1024,0.008659199873606364
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,8192,1024,0.01767359972000122
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,8192,6144,0.023640533288319908
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,8192,1024,0.00788800021012624
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,8192,768,0.007340799768765767
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,8192,768,0.015371732910474143
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,8192,512,0.005499733487764994
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,8192,5120,0.020617600282033285
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,8192,512,0.015786666671435037
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,8192,256,0.0036159999668598174
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,8192,256,0.015154133240381876
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,8192,4096,0.017578667402267455
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,8192,3584,0.016218666235605875
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,8192,128,0.003319466610749563
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,8192,3072,0.014521599809328715
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,8192,128,0.014756266276041666
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,8192,64,0.003138133386770884
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,8192,64,0.014949333667755128
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,8192,32,0.0032469332218170166
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,8192,32,0.015060266852378846
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,8192,2048,0.011296000083287556
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,7168,65536,0.3349013328552246
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,7168,65536,0.19555946985880535
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,8192,1536,0.009805867075920105
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,7168,16384,0.10065706570943196
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,7168,16384,0.05783466498057047
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,7168,65536,0.1892074743906657
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,8192,768,0.006809600194295247
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,8192,512,0.00626453310251236
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,8192,256,0.005560533205668131
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,7168,12288,0.06499840021133423
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,8192,128,0.005141333242257436
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,7168,12288,0.04846400022506714
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,7168,10240,0.05346773465474447
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,7168,10240,0.042225066820780435
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,7168,16384,0.05160426696141561
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,7168,8192,0.04347306489944458
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,7168,8192,0.03664000034332275
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,7168,7168,0.03851840098698934
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,7168,7168,0.03402986526489258
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,7168,6144,0.033843199412028
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,7168,6144,0.03134079972902934
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,7168,12288,0.03992213408152263
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,7168,5120,0.028833067417144774
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,7168,5120,0.028483200073242187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,7168,4096,0.023753599325815836
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,7168,10240,0.0350325345993042
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,7168,4096,0.025490132967631023
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,7168,3584,0.02125119964281718
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,7168,3584,0.02396906614303589
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,7168,3072,0.01883626580238342
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,7168,8192,0.029048534234364827
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,7168,3072,0.02279040018717448
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,7168,7168,0.02550400098164876
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,7168,6144,0.022551467021306358
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,7168,2560,0.01616320013999939
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,7168,2560,0.021413334210713706
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,7168,2048,0.013209600249926248
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,7168,2048,0.02025066614151001
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,7168,5120,0.01993066668510437
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,7168,2560,0.012205866972605388
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,7168,4096,0.0166293332974116
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,7168,3584,0.015294933319091797
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,7168,2048,0.01051093339920044
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,7168,3072,0.013566933075586953
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,7168,1536,0.010728533069292705
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,7168,1536,0.01919680039087931
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,7168,1024,0.008222933113574981
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,7168,1024,0.01658560037612915
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,7168,768,0.006920533378918965
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,7168,768,0.014973866939544677
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,7168,1536,0.009168000022570292
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,7168,512,0.004299733539422353
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,7168,512,0.015190399686495461
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,7168,256,0.0035071998834609987
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,7168,256,0.014816000064214071
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,7168,256,0.005218133330345154
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,7168,128,0.003257599969704946
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,7168,128,0.014734933773676554
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,7168,64,0.003009066730737686
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,7168,64,0.014780799547831217
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,7168,32,0.0030591999491055804
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,7168,1024,0.007495466868082683
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,7168,768,0.006377600133419037
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,7168,32,0.014893866578737893
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,6144,16384,0.07746666272481283
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,6144,16384,0.05448000033696493
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,6144,65536,0.2829354604085287
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,6144,65536,0.16506452560424806
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,6144,16384,0.04872746864954631
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,6144,12288,0.06137173175811768
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,6144,12288,0.04446613391240438
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,6144,12288,0.03905493418375651
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,6144,10240,0.05156693458557129
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,6144,10240,0.03940586646397908
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,7168,512,0.005789866546789805
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,6144,8192,0.040327465534210204
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,6144,8192,0.03455573320388794
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,7168,128,0.004811733464399974
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,6144,6144,0.03099520007769267
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,6144,7168,0.03527359962463379
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,6144,7168,0.03278506596883138
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,6144,6144,0.029689600070317585
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,6144,65536,0.17266880671183268
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,6144,5120,0.02619626720746358
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,6144,5120,0.027163734038670857
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,6144,10240,0.03299946586290996
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,6144,8192,0.02770026723543803
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,6144,7168,0.02410986622174581
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,6144,4096,0.024477867285410564
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,6144,4096,0.024549333254496257
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,6144,6144,0.02127679983774821
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,6144,3584,0.019797333081563315
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,6144,3584,0.023931733767191567
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,6144,5120,0.01837013363838196
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,6144,3072,0.017299199104309083
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,6144,3072,0.022170666853586832
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,6144,2560,0.015089066823323569
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,6144,2560,0.021104000012079873
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,6144,2048,0.012472533186276754
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,6144,2048,0.019700266917546592
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,6144,1536,0.010196266571680705
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,6144,1536,0.018283732732137046
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,6144,1024,0.007594666878382365
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,6144,1024,0.015736533204714458
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,6144,4096,0.01565439999103546
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,6144,768,0.006491733094056447
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,6144,768,0.0153546671072642
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,6144,3584,0.014447999993960061
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,6144,512,0.003925333420435587
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,6144,512,0.01518933375676473
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,6144,3072,0.012852266430854797
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,6144,256,0.0034495999415715536
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,6144,2560,0.011720533172289532
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,6144,256,0.014801067113876343
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,6144,128,0.0032117334504922234
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,6144,2048,0.010079999764760334
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,6144,128,0.014579199751218162
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,6144,128,0.004762666424115499
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,6144,64,0.003009066730737686
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,6144,1536,0.00900266667207082
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,6144,32,0.0030432000756263735
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,6144,64,0.014722133676211039
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,6144,32,0.014757333199183145
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,6144,1024,0.00676800012588501
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,5120,65536,0.23474879264831544
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,5120,16384,0.06505279938379924
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,5120,65536,0.14481813112894695
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,6144,768,0.00625493327776591
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,5120,16384,0.04649600187937419
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,5120,16384,0.05036266644795736
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,5120,12288,0.05048533280690511
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,5120,12288,0.038496001561482744
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,6144,512,0.005685333410898844
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,5120,10240,0.04295573234558105
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,5120,10240,0.034689064820607504
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,5120,8192,0.03518720070521037
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,5120,8192,0.030856533845265703
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,6144,256,0.005196799834569296
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,5120,7168,0.028546132644017536
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,5120,7168,0.02890133261680603
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,5120,65536,0.16556480725606282
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,5120,6144,0.025912533203760784
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,5120,6144,0.026557866732279462
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,5120,5120,0.022668800751368203
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,5120,12288,0.03541333278020223
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,5120,10240,0.03118293285369873
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,5120,8192,0.02609386642773946
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,5120,5120,0.02451200087865194
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,5120,4096,0.017884800831476845
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,5120,4096,0.022651733954747517
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,5120,3584,0.01601920028527578
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,5120,3584,0.0217141330242157
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,5120,7168,0.02286720077196757
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,5120,3072,0.01411946713924408
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,5120,3072,0.020939733584721884
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,5120,2560,0.012282666563987733
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,5120,2560,0.020115200678507486
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,5120,2048,0.010397866368293762
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,5120,2048,0.018998400370279948
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,5120,1536,0.008659199873606364
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,5120,1536,0.016457600394884746
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,5120,1536,0.008295466502507527
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,5120,1024,0.006820266445477803
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,5120,6144,0.02026559909184774
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,5120,1024,0.01548693378766378
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,5120,1024,0.006817066669464111
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,5120,768,0.005295999844868978
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,5120,768,0.015638400117556253
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,5120,5120,0.017395200332005818
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,5120,512,0.003685333331425985
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,5120,512,0.014899200201034546
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,5120,256,0.0033312000334262846
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,5120,256,0.014658133188883463
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,5120,4096,0.015106133619944253
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,5120,128,0.003138133386770884
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,5120,3584,0.014006400108337402
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,5120,128,0.014468266566594442
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,5120,64,0.002935466667016347
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,5120,64,0.014651733636856078
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,5120,32,0.0029557332396507262
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,5120,32,0.014679466684659323
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,4096,65536,0.19069760640462238
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,4096,65536,0.11705493132273356
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,5120,3072,0.012513066331545511
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,4096,16384,0.05121386845906576
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,5120,2560,0.011272533734639486
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,4096,16384,0.04144320090611776
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,5120,2048,0.009620267152786254
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,4096,12288,0.03989439805348714
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,4096,12288,0.03504213492075602
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,4096,10240,0.034216535091400144
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,4096,10240,0.031249066193898518
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,5120,768,0.006151466568311056
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,5120,512,0.005606399973233541
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,4096,8192,0.02834133307139079
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,4096,8192,0.02763306697209676
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,5120,256,0.005093333125114441
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,5120,128,0.0046528001626332605
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,4096,7168,0.025544534126917522
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,4096,7168,0.025740800301233928
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,4096,7168,0.021011199553807577
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,4096,6144,0.02281600038210551
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,4096,6144,0.024197334051132204
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,4096,5120,0.01810986598332723
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,4096,5120,0.022769065697987874
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,4096,65536,0.14346987406412762
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,4096,4096,0.01495680014292399
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,4096,16384,0.04258133172988891
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,4096,4096,0.02116480072339376
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,4096,12288,0.03246293266614278
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,4096,3584,0.0132832000652949
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,4096,10240,0.027982934315999346
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,4096,3584,0.02076693375905355
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,4096,3072,0.011890133221944172
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,4096,3072,0.02004800041516622
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,4096,8192,0.023306665817896526
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,4096,6144,0.018807466824849448
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,4096,5120,0.015852800011634825
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,4096,2560,0.019138133525848387
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,4096,2560,0.010492799679438274
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,4096,2048,0.009089066584904989
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,4096,4096,0.013610666990280152
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,4096,2048,0.018004266421000163
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,4096,3584,0.012676266829172769
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,4096,1536,0.007683200140794118
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,4096,1536,0.015438933173815408
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,4096,1024,0.0153546671072642
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,4096,1024,0.005977599819501241
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,4096,3072,0.011565867066383361
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,4096,2560,0.010956799983978272
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,4096,768,0.003909333298603693
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,4096,768,0.015129599968592325
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,4096,512,0.0035306667288144433
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,4096,2048,0.009118933478991191
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,4096,512,0.014870400230089823
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,4096,256,0.0031690667072931922
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,4096,256,0.01476693352063497
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,4096,128,0.002948266764481862
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,4096,128,0.014618666966756186
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,4096,1536,0.007527466615041096
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,4096,64,0.0028864001234372456
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,4096,64,0.014726400375366211
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,4096,32,0.0029237332443396253
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,4096,32,0.014789332946141561
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,3584,65536,0.1730965296427409
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,3584,65536,0.10929280122121174
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,3584,65536,0.13109440008799236
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,3584,16384,0.05225280125935873
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,3584,16384,0.03833173513412476
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,4096,1024,0.006548266609509785
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,4096,768,0.006074666480223338
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,3584,12288,0.03593706687291463
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,4096,512,0.005410133302211762
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,3584,12288,0.032892799377441405
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,4096,256,0.005122133096059163
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,4096,128,0.0047082667549451195
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,3584,10240,0.030805333455403643
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,3584,10240,0.030717867612838744
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,3584,16384,0.03937173287073771
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,3584,12288,0.029981867472330732
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,3584,8192,0.02560639977455139
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,3584,8192,0.02620159983634949
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,3584,7168,0.02318613330523173
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,3584,7168,0.02444480061531067
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,3584,7168,0.01957013408342997
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,3584,6144,0.020667733748753865
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,3584,6144,0.023270400365193684
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,3584,5120,0.018066134055455527
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,3584,5120,0.02206933299700419
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,3584,4096,0.013261866569519044
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,3584,4096,0.02072640061378479
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,3584,3584,0.012071466445922852
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,3584,3584,0.019883733987808228
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,3584,10240,0.025729066133499144
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,3584,8192,0.022037333250045775
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,3584,3072,0.010859733819961548
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,3584,3072,0.01926079988479614
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,3584,2560,0.00974720021088918
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,3584,6144,0.017628800868988038
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,3584,2048,0.008400000135103862
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,3584,5120,0.015500799814860026
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,3584,2560,0.01890453298886617
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,3584,4096,0.012995200355847678
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,3584,3584,0.012301866213480632
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,3584,2560,0.01018773317337036
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,3584,2048,0.017633066574732462
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,3584,1536,0.007321600119272869
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,3584,1536,0.01619733373324076
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,3584,1024,0.004885333279768625
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,3584,1024,0.015230933825174967
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,3584,768,0.004018133382002512
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,3584,768,0.015205333630243937
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,3584,3072,0.01134933332602183
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,3584,512,0.0035573333501815797
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,3584,512,0.014903466900189719
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,3584,256,0.0031189332405726117
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,3584,256,0.014693333705266317
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,3584,128,0.002916266769170761
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,3584,128,0.014569600423177084
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,3584,2048,0.008461866776148479
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,3584,1536,0.007451733450094859
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,3584,1024,0.006218666831652323
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,3584,768,0.005764266848564148
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,3584,64,0.0027893332143624624
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,3584,512,0.005470933516820272
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,3584,64,0.014648532867431641
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,3584,32,0.002962133288383484
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,3584,32,0.014686933159828186
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,3072,65536,0.1468832015991211
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,3072,65536,0.09531733194986979
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,3072,16384,0.04184639851252238
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,3072,65536,0.1528490702311198
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,3584,128,0.00470719983180364
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,3584,256,0.004983466863632202
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,3072,16384,0.0366485317548116
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,3072,12288,0.032892799377441405
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,3072,12288,0.031015467643737794
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,3072,10240,0.02787733276685079
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,3072,10240,0.02810773253440857
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,3072,8192,0.023987199862798056
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,3072,8192,0.02581760088602702
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,3072,8192,0.023509333531061806
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,3072,7168,0.021401600042978922
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,3072,7168,0.023616000016530355
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,3072,7168,0.020497065782546998
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,3072,6144,0.018952532609303793
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,3072,6144,0.022418133417765298
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,3072,5120,0.016562133034070333
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,3072,5120,0.020989867051442464
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,3072,16384,0.04150720040003459
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,3072,12288,0.031167999903361006
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,3072,10240,0.02730773289998372
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,3072,6144,0.01845226685206095
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,3072,4096,0.014212266604105631
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,3072,4096,0.02032853364944458
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,3072,3584,0.01104213297367096
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,3072,5120,0.01585706671079
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,3072,3584,0.019319466749827065
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,3072,3584,0.012657066186269125
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,3072,3072,0.010055466492970785
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,3072,3072,0.01916266679763794
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,3072,2560,0.008943999807039898
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,3072,4096,0.013476266463597616
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,3072,2560,0.01745599905649821
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,3072,2560,0.010262399911880493
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,3072,2048,0.007972266773382823
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,3072,2048,0.016511999567349753
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,3072,1536,0.006669866542021434
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,3072,3072,0.0114847997824351
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,3072,768,0.003956266740957896
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,3072,1536,0.016029866536458333
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,3072,1536,0.007530666887760162
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,3072,1024,0.004576000074545542
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,3072,1024,0.0153546671072642
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,3072,768,0.015210666259129844
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,3072,512,0.0035605333745479585
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,3072,512,0.014846932888031007
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,3072,256,0.0031968000034491217
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,3072,256,0.01474453310171763
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,3072,256,0.0048981333772341405
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,3072,128,0.0029877332349618276
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,3072,128,0.014666666587193808
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,3072,2048,0.008526933193206788
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,3072,64,0.002865066627661387
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,3072,64,0.014443733294804893
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,3072,32,0.002906666696071625
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,2560,65536,0.12642239729563395
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,3072,1024,0.006237866481145223
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,3072,32,0.014481066664059957
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,3072,768,0.005763199925422668
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,2560,65536,0.08289600213368734
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,3072,512,0.005217066903909048
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,2560,16384,0.03532906770706177
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,2560,16384,0.03227519989013672
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,3072,128,0.004650666813055674
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,2560,12288,0.027291733026504516
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,2560,12288,0.02759679953257243
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,2560,10240,0.023949867486953734
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,2560,10240,0.025547732909520466
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,2560,8192,0.019782400131225585
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,2560,8192,0.023331199089686075
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,2560,8192,0.02082879940668742
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,2560,7168,0.017869865894317626
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,2560,7168,0.022362667322158813
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,2560,6144,0.016006400187810264
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,2560,16384,0.036962131659189865
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,2560,65536,0.13612267176310222
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,2560,6144,0.021690666675567627
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,2560,5120,0.02116159995396932
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,2560,5120,0.013870933651924133
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,2560,4096,0.011845333377520244
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,2560,4096,0.019522132476170857
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,2560,3584,0.01113706628481547
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,2560,3584,0.019151999553044637
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,2560,3072,0.010257066289583842
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,2560,3072,0.018605866034825645
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,2560,12288,0.02802346746126811
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,2560,2560,0.008322133123874665
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,2560,10240,0.024600533644358318
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,2560,2560,0.01655359963575999
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,2560,2048,0.007309866448243459
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,2560,2048,0.016170666615168253
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,2560,7168,0.018667733669281004
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,2560,6144,0.016474666198094685
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,2560,1536,0.005730133255322774
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,2560,1536,0.01597119967142741
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,2560,5120,0.014572800199190775
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,2560,4096,0.012356266379356384
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,2560,1024,0.004162133236726125
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,2560,3584,0.0116266667842865
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,2560,1024,0.0155157337586085
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,2560,768,0.0038261334101359046
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,2560,768,0.015246933698654175
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,2560,512,0.003554133325815201
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,2560,3072,0.010371200243631999
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,2560,512,0.014961066842079162
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,2560,2560,0.009266133109728496
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,2560,2048,0.007931733131408691
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,2560,256,0.00325546662012736
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,2560,256,0.014737066626548768
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,2560,128,0.003018666555484136
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,2560,1536,0.006886399785677592
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,2560,128,0.014520532886187234
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,2560,1024,0.006126933296521505
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,2560,64,0.0028064000109831494
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,2560,768,0.005703466633955637
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,2560,64,0.014598400394121806
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,2560,32,0.002807466685771942
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,2560,32,0.01460906664530436
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,2048,65536,0.09987733364105225
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,2048,65536,0.06877760092417398
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,2048,65536,0.12746986548105876
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,2048,16384,0.028201599915822346
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,2048,16384,0.02913706700007121
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,2048,12288,0.022634667158126832
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,2560,512,0.005357866485913595
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,2560,256,0.00490880012512207
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,2048,12288,0.025054933627446492
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,2048,10240,0.019795199235280357
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,2560,128,0.004649599889914194
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,2048,10240,0.023655466238657632
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,2048,8192,0.016189866264661155
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,2048,8192,0.022016000747680665
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,2048,7168,0.014920533696810404
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,2048,16384,0.03660586675008138
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,2048,6144,0.020356265703837077
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,2048,7168,0.0212501327196757
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,2048,12288,0.02837973237037659
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,2048,6144,0.013262933492660523
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,2048,5120,0.011921067039171855
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,2048,5120,0.02005866765975952
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,2048,10240,0.02471253275871277
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,2048,4096,0.010457600156466167
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,2048,4096,0.018652800718943277
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,2048,3584,0.009733333190282186
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,2048,8192,0.02137920061747233
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,2048,7168,0.018666666746139527
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,2048,3584,0.017411200205485027
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,2048,3072,0.01714986761411031
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,2048,3072,0.009057066837946574
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,2048,2560,0.007618133227030437
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,2048,2560,0.016622933745384216
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,2048,6144,0.016730666160583496
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,2048,5120,0.014703999956448874
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,2048,2048,0.006728533407052357
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,2048,4096,0.012557866175969443
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,2048,2048,0.01633280018965403
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,2048,2048,0.00795306662718455
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,2048,1536,0.005096533397833506
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,2048,1536,0.01600320041179657
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,2048,3584,0.011356799801190694
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,2048,3072,0.010361599922180175
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,2048,1536,0.007398400207360585
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,2048,1024,0.0041685332854588825
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,2048,1024,0.015490133563677469
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,2048,768,0.003752533346414566
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,2048,2560,0.009082667032877604
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,2048,768,0.015032533804575601
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,2048,512,0.003432533393303553
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,2048,256,0.014541866381963095
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,2048,512,0.015072000026702882
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,2048,1024,0.006257066627343495
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,2048,256,0.0031008000175158186
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,2048,768,0.005648000041643778
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,2048,64,0.014310399691263834
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,2048,128,0.0028512001037597655
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,2048,512,0.005299200117588043
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,2048,128,0.01439573367436727
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,2048,64,0.002716800073782603
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,2048,32,0.0026986666023731233
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,2048,32,0.014429866274197897
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,1536,65536,0.07724800109863281
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,1536,65536,0.058882133165995276
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,1536,16384,0.023282132546106973
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,2048,256,0.0049333333969116214
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,1536,16384,0.025458133220672606
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,1536,16384,0.03059413234392802
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,1536,12288,0.018548266092936198
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,1536,12288,0.02288320064544678
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,2048,128,0.00461760014295578
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,1536,10240,0.01601066688696543
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,1536,10240,0.021916800737380983
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,1536,8192,0.013165866335233053
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,1536,65536,0.10847466786702473
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,1536,12288,0.023845332860946655
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,1536,8192,0.02118933399518331
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,1536,7168,0.012437333663304646
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,1536,10240,0.02073919971783956
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,1536,7168,0.02005973259607951
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,1536,7168,0.016088533401489257
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,1536,6144,0.011404800415039062
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,1536,6144,0.019375999768575035
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,1536,5120,0.010450133681297302
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,1536,5120,0.019025067488352455
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,1536,5120,0.012665599584579468
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,1536,4096,0.009262933333714803
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,1536,8192,0.018253866831461588
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,1536,3072,0.008132266501585644
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,1536,4096,0.017460266749064125
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,1536,4096,0.01011199951171875
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,1536,3584,0.008718933661778767
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,1536,3584,0.00977280040582021
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,1536,3584,0.01715839902559916
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,1536,3072,0.017014400164286295
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,1536,6144,0.01425386667251587
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,1536,2560,0.006975999971230824
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,1536,2560,0.016309332847595216
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,1536,2048,0.00556160012880961
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,1536,2048,0.015998933712641397
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,1536,1536,0.004820266862710317
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,1536,1536,0.015729066729545594
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,1536,1536,0.006600533425807953
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,1536,1024,0.0040949332217375435
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,1536,3072,0.008784000078837078
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,1536,2560,0.008150400221347808
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,1536,1024,0.015246933698654175
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,1536,2048,0.007276799778143566
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,1536,768,0.0037248000502586366
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,1536,768,0.015012266238530478
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,1536,512,0.003373866776625315
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,1536,1024,0.005684266487757364
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,1536,512,0.014857600132624308
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,1536,768,0.005377066632111868
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,1536,256,0.003127466638882955
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,1536,256,0.014728533228238425
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,1536,64,0.014552533626556396
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,1536,128,0.0028213332096735638
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,1536,32,0.0027583998938401537
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,1536,128,0.014461867014567056
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,1536,128,0.004645333190759023
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,1536,512,0.005109333495299021
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,1536,64,0.002833066632350286
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,1536,256,0.0047989333669344585
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,1536,32,0.014485333363215128
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,1024,65536,0.0516319990158081
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,1024,65536,0.045449598630269365
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,1024,16384,0.0162581334511439
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,1024,16384,0.02265066703160604
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,1024,16384,0.024791467189788818
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,1024,12288,0.013662933309872945
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,1024,12288,0.02080000042915344
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,1024,10240,0.012330666184425354
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,1024,10240,0.020080000162124634
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,1024,8192,0.01056106686592102
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,1024,8192,0.019630932807922365
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,1024,65536,0.09161813259124756
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,1024,12288,0.01979093352953593
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,1024,7168,0.009981866677602131
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,1024,7168,0.018541866540908815
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,1024,6144,0.008973866701126099
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,1024,6144,0.017805866400400796
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,1024,10240,0.017191465695699057
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,1024,5120,0.00864533285299937
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,1024,5120,0.018180267016092936
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,1024,4096,0.007776000102361043
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,1024,4096,0.017602133750915527
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,1024,4096,0.008810666203498841
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,1024,8192,0.014717866977055868
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,1024,3584,0.007377066711584728
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,1024,3584,0.017102932929992674
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,1024,3072,0.017087999979654947
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,1024,7168,0.012854400277137756
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,1024,3584,0.008195200065771738
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,1024,6144,0.011373866597811382
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,1024,3072,0.006585599978764851
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,1024,2560,0.006056533257166544
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,1024,2560,0.016520532965660095
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,1024,2048,0.005411200225353241
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,1024,5120,0.009868799646695455
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,1024,2048,0.016291200121243795
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,1024,1536,0.004746666550636292
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,1024,1536,0.01581546664237976
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,1024,1536,0.005992533266544342
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,1024,3072,0.007605333129564922
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,1024,2560,0.0071381335457166035
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,1024,1024,0.004084266722202301
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,1024,2048,0.006505600114663441
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,1024,1024,0.01518186628818512
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,1024,1024,0.005221333106358846
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,1024,256,0.003047466774781545
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,1024,768,0.003770666569471359
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,1024,768,0.014998400211334228
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,1024,128,0.0028586665789286296
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,1024,512,0.0033770665526390077
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,1024,512,0.014692266782124838
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,1024,256,0.004709333181381226
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,1024,256,0.014582399527231851
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,1024,128,0.014298666516939798
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,1024,64,0.002726399898529053
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,1024,64,0.014421332875887552
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,1024,32,0.0026602665583292644
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,1024,32,0.014226133624712626
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,768,65536,0.04083733161290486
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,768,65536,0.03965226809183757
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,768,16384,0.013630933562914529
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,768,16384,0.021086933215459187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,768,12288,0.01158186693986257
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,768,12288,0.01960106690724691
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,768,12288,0.017758933703104655
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,768,10240,0.010705066720644633
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,768,10240,0.018949333826700845
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,768,10240,0.015582933028539022
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,768,8192,0.00947093367576599
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,768,8192,0.01825493375460307
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,768,7168,0.008762666583061218
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,768,7168,0.01851093371709188
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,1024,768,0.005115733544031779
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,768,6144,0.008051200211048127
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,768,6144,0.017939200003941856
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,1024,512,0.004924799998601278
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,768,5120,0.0075103998184204105
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,768,5120,0.018097066879272462
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,1024,128,0.004622933268547058
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,768,4096,0.00652159998814265
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,768,4096,0.017473065853118898
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,768,65536,0.07777706782023111
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,768,3584,0.006888533135255177
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,768,16384,0.022372267643610635
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,768,3584,0.017151999473571777
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,768,3072,0.00639573335647583
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,768,3072,0.0170741339524587
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,768,2560,0.005992533266544342
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,768,8192,0.01338879962762197
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,768,2560,0.01648319959640503
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,768,2048,0.005301333467165629
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,768,7168,0.011925333738327026
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,768,2048,0.015995732943216958
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,768,1536,0.004661333560943603
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,768,1536,0.015650133291880287
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,768,6144,0.010570666193962098
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,768,1024,0.004026666780312856
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,768,1024,0.015587199727694193
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,768,5120,0.009609599908192951
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,768,768,0.003692800054947535
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,768,768,0.015117866794268289
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,768,4096,0.008550399541854858
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,768,512,0.0033930666744709016
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,768,512,0.014757333199183145
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,768,3584,0.008039466540018718
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,768,256,0.003054933249950409
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,768,256,0.01451520025730133
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,768,3072,0.007502933343251546
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,768,128,0.0028704000016053517
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,768,128,0.014586666226387024
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,768,2560,0.006883200009663899
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,768,64,0.0026464000344276427
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,768,2048,0.0062837332487106325
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,768,64,0.014267733693122864
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,768,32,0.0026549334327379864
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,768,32,0.014320000012715658
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,512,65536,0.029500800371170043
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,768,1536,0.005830400188763936
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,512,65536,0.03402239878972371
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,512,16384,0.010735999544461567
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,512,16384,0.01996906598409017
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,512,12288,0.009361066420873006
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,512,12288,0.018074667453765868
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,768,1024,0.005334400137265523
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,512,10240,0.008772266904513042
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,512,10240,0.018272000551223754
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,768,768,0.004955733319123586
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,512,8192,0.007980800171693166
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,512,8192,0.017633066574732462
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,768,512,0.004836266736189524
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,512,7168,0.006961066524187725
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,512,7168,0.018237866957982383
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,512,7168,0.012010666728019714
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,512,6144,0.00639466643333435
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,512,6144,0.017847466468811034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,768,256,0.0047082667549451195
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,512,5120,0.006438399851322174
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,768,128,0.004532266656557719
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,512,5120,0.018039466937383015
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,512,4096,0.006007466713587443
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,512,4096,0.017244799931844076
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,512,65536,0.07544213136037191
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,512,16384,0.022215465704600014
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,512,3584,0.006520533561706543
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,512,3584,0.017064533631006875
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,512,3072,0.006150400141874949
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,512,3072,0.017230933904647826
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,512,12288,0.017115734020868936
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,512,2560,0.0059797331690788266
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,512,10240,0.014841600259145101
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,512,2560,0.016410666704177856
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,512,8192,0.012689066926638284
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,512,2048,0.005314133564631144
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,512,2048,0.015874133507410685
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,512,2048,0.00622080018122991
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,512,1536,0.004753066599369049
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,512,1536,0.015698132912317912
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,512,6144,0.010583466291427613
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,512,1024,0.004070399949947992
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,512,1024,0.016275200247764587
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,512,5120,0.009559466441472372
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,512,768,0.003737599899371465
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,512,4096,0.008423466483751934
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,512,768,0.015035733580589294
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,512,512,0.003443199892838796
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,512,3584,0.00811413327852885
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,512,512,0.014896000425020853
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,512,256,0.003083733220895131
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,512,3072,0.0073642666141192125
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,512,256,0.014648532867431641
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,512,128,0.0029045333464940387
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,512,128,0.01460906664530436
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,512,128,0.004524800181388855
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,512,64,0.0027647999425729113
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,512,64,0.014538666605949402
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,512,32,0.0026858667532602947
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,512,2560,0.006931200126806895
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,512,32,0.01461120049158732
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,256,65536,0.018613332509994508
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,256,65536,0.02640320062637329
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,256,16384,0.007762133578459422
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,256,16384,0.01880426605542501
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,256,12288,0.006626133124033611
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,256,12288,0.0179967999458313
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,512,1536,0.005973333120346069
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,256,10240,0.0065098668138186145
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,256,10240,0.018659200270970663
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,512,1024,0.005333333214124044
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,256,8192,0.006390400230884552
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,256,8192,0.01773759921391805
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,256,8192,0.01260586678981781
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,256,7168,0.00622080018122991
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,256,7168,0.01817493240038554
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,512,768,0.004986666639645894
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,256,6144,0.005992533266544342
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,256,6144,0.017748266458511353
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,512,512,0.004693333307902018
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,256,6144,0.010487467050552368
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,256,5120,0.006434133152167003
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,512,256,0.004612266520659129
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,256,5120,0.018003199497858682
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,256,4096,0.005985066791375478
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,256,65536,0.07454933325449625
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,256,16384,0.021268266439437866
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,256,12288,0.016746666034062704
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,256,4096,0.017834667364756265
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,256,3584,0.006549333532651265
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,256,3584,0.017025067408879598
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,256,3584,0.007906133433183034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,256,3072,0.006099199752012888
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,256,10240,0.014703999956448874
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,256,3072,0.0166293332974116
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,256,2560,0.006043733159701029
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,256,2560,0.016476800044377647
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,256,2048,0.005409066875775656
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,256,2048,0.016119466225306193
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,256,1536,0.004693333307902018
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,256,7168,0.011711999773979187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,256,1536,0.005774933099746704
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,256,1536,0.01576746702194214
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,256,1024,0.004051200052102407
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,256,1024,0.005209599932034811
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,256,1024,0.015464533368746439
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,256,768,0.0036256000399589538
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,256,5120,0.009588266412417095
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,256,512,0.003319466610749563
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,256,768,0.01506239970525106
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,256,512,0.014659200112024942
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,256,256,0.0029845332105954488
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,256,256,0.004535466432571411
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,256,256,0.01454080045223236
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,256,128,0.0028384000062942503
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,256,128,0.014413866400718688
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,256,64,0.002656000107526779
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,256,4096,0.008449066678682964
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,256,64,0.01430400013923645
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,256,32,0.0026602665583292644
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,256,32,0.014215466380119324
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,128,65536,0.011782399813334147
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,128,65536,0.02378666599591573
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,128,16384,0.006230400005976359
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,128,16384,0.018161066373189292
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,256,3072,0.007367466886838277
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,128,16384,0.02135253349939982
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,256,2560,0.006937600175539653
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,256,2048,0.006192000210285186
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,128,12288,0.006030933558940887
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,128,12288,0.01810773412386576
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,256,128,0.00444160004456838
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,256,768,0.0049674664934476215
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,128,10240,0.006183466811974844
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,128,10240,0.01827733318010966
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,256,512,0.004799999793370565
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,128,8192,0.00617386649052302
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,128,8192,0.01789120038350423
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,128,65536,0.07402133146921794
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,128,7168,0.005986133217811584
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,128,7168,0.018323200941085815
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,128,6144,0.005871999760468801
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,128,12288,0.01680213411649068
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,128,6144,0.018100267648696898
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,128,5120,0.006200533111890157
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,128,5120,0.018029866615931193
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,128,4096,0.005780266722043356
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,128,4096,0.017374932765960693
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,128,3584,0.0062613333264986675
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,128,3584,0.017534933487574258
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,128,10240,0.014755200346310934
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,128,3072,0.005896533528963724
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,128,3072,0.01726933320363363
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,128,2560,0.005981866518656413
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,128,2560,0.01625706652800242
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,128,8192,0.012604799866676331
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,128,2048,0.005312000215053558
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,128,7168,0.011636267105738323
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,128,2048,0.015946666399637856
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,128,1536,0.004648533463478088
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,128,1536,0.015597866972287497
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,128,6144,0.010568533341089885
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,128,1024,0.004020266731580098
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,128,1024,0.015359999736150107
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,128,5120,0.009451733032862345
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,128,768,0.003668266783157984
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,128,4096,0.008400000135103862
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,128,768,0.014811733365058899
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,128,768,0.005031466484069824
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,128,512,0.0033546666304270422
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,128,3584,0.008126933375994365
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,128,512,0.01478506624698639
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,128,512,0.004804266492525736
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,128,256,0.0030239999294281008
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,128,256,0.014517333110173544
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,128,3072,0.007369600236415863
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,128,128,0.0027882667879263563
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,128,2560,0.006867200136184692
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,128,128,0.014444800217946372
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,128,2048,0.006133333345254262
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,128,1536,0.005862399935722351
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,128,64,0.0026634665826956432
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,128,64,0.014270933469136557
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,128,32,0.002682666728893916
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,128,32,0.014493866761525472
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,128,1024,0.0052490666508674625
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,64,65536,0.009154133001963298
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,64,65536,0.0233130673567454
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,128,256,0.004532266656557719
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,64,16384,0.006083199878533682
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,64,16384,0.018361600240071614
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,64,12288,0.006004266440868378
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,32,128,128,0.004518400132656098
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,64,12288,0.01821440060933431
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,64,10240,0.006121600170930227
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,64,10240,0.0180351992448171
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,64,8192,0.00613013356924057
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,64,8192,0.017568000157674155
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,64,7168,0.0058794667323430385
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,64,7168,0.01816426714261373
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,64,6144,0.0056970665852228795
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,64,6144,0.017926400899887084
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,64,5120,0.006087466577688853
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,64,5120,0.0182751993338267
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,64,4096,0.005799466868241628
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,64,4096,0.01732800006866455
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,64,3584,0.006113066772619883
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,64,3584,0.01722453236579895
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,64,3072,0.005866666634877523
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,64,3072,0.016786134243011473
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,64,2560,0.005915733178456625
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,64,2560,0.016313599546750386
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,64,2048,0.005256533126036326
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,64,2048,0.016285866498947144
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,64,1536,0.004650666813055674
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,64,1536,0.016269866625467935
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,64,1024,0.003958400090535482
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,64,768,0.003636266787846883
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,64,1024,0.015284267067909241
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,64,768,0.014985600113868713
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,64,512,0.003305600086847941
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,64,512,0.014642133315404256
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,64,256,0.0029493334392706556
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,64,256,0.014563199877738953
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,64,128,0.0027562665442625684
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,64,128,0.014350933829943338
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,64,64,0.0027488000690937043
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,64,64,0.014518400033315023
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,64,32,0.0026975999275843303
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,64,32,0.014381866653760275
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,32,12288,0.018055466810862224
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,32,65536,0.008694400389989216
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,32,65536,0.022363734245300294
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,32,16384,0.006422399977842967
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,32,16384,0.01846826672554016
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,32,7168,0.00581226646900177
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,32,12288,0.006022400160630544
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,32,10240,0.006196266909440359
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,32,8192,0.005963733295599619
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,32,10240,0.018143999576568603
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,32,8192,0.01805013418197632
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,32,6144,0.005690666536490122
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,32,7168,0.018161066373189292
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,32,6144,0.017529600858688356
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,32,5120,0.006187733511130015
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,32,5120,0.01801066597302755
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,32,4096,0.005692799886067709
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,32,4096,0.017511467138926186
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,32,3584,0.00613013356924057
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,32,3584,0.017102932929992674
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,32,3072,0.005868799984455109
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,32,3072,0.016541866461435954
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,32,2560,0.005919999877611796
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,32,2560,0.016587733229001363
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,32,1024,0.015261866648991904
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,32,2048,0.005329066514968872
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,32,2048,0.015851733088493348
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,32,1536,0.0046079998215039575
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,32,1536,0.01552959978580475
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,32,1024,0.0040501333773136135
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,32,768,0.0035648000737031303
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,32,512,0.003340800106525421
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,32,768,0.01493013302485148
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,32,512,0.014689067006111145
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,32,256,0.0029578665892283124
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,32,256,0.01469546655813853
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,32,128,0.002752000093460083
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,32,128,0.01446613371372223
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,32,64,0.0026677332818508146
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,32,64,0.014247467120488485
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,32,32,32,0.0026975999275843303
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,32,32,32,0.014459733168284097
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,65536,16384,0.3927040100097656
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,65536,12288,0.5712938944498698
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,65536,16384,0.7480181376139323
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,65536,12288,0.2985866546630859
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,65536,10240,0.251257594426473
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,65536,10240,0.47414080301920575
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,65536,8192,0.38046401341756186
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,65536,8192,0.2231658617655436
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,65536,7168,0.18063999811808268
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,65536,7168,0.33085225423177084
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,65536,6144,0.28564478556315104
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,65536,6144,0.1578719933827718
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,65536,5120,0.23844693501790365
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,65536,5120,0.13395519256591798
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,65536,4096,0.1919914722442627
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,65536,4096,0.11064213116963703
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,65536,16384,0.3578485488891602
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,65536,12288,0.2697941462198893
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,65536,3584,0.16839040120442708
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,65536,3584,0.09926400184631348
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,65536,10240,0.2261184056599935
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,65536,3072,0.14556585947672526
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,65536,8192,0.18201707204182943
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,65536,6144,0.14202346801757812
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,65536,7168,0.1668000062306722
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,65536,3072,0.09538239638010661
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,65536,2560,0.0756341298421224
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,65536,2560,0.12771519819895427
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,65536,5120,0.1168394645055135
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,65536,4096,0.09476053714752197
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,65536,2048,0.09782719612121582
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,65536,2048,0.06378879944483438
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,65536,1536,0.07686613400777181
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,65536,1536,0.052391465504964194
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,65536,1024,0.05117760101954142
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,65536,1024,0.040321067969004316
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,65536,768,0.040226133664449056
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,65536,768,0.03464853366216024
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,65536,512,0.027830400069554645
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,65536,512,0.027914667129516603
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,65536,3584,0.08421866893768311
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,65536,256,0.01778879960378011
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,65536,256,0.021320533752441407
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,65536,3072,0.07310293515523275
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,65536,128,0.010285866260528565
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,65536,128,0.018198400735855103
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,65536,2560,0.07068266868591308
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,65536,2048,0.051336534818013514
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,65536,64,0.006974933544794719
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,65536,32,0.004727466901143392
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,65536,64,0.016376533110936484
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,65536,32,0.016407466928164163
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,65536,768,0.023348265886306764
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,65536,512,0.01738133430480957
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,65536,1024,0.029158399502436323
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,65536,1536,0.04043200016021729
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,16384,65536,0.38111893335978186
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,16384,16384,0.10776106516520183
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,16384,65536,0.7432490666707356
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,16384,16384,0.183733336130778
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,16384,12288,0.08488960266113281
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,16384,12288,0.1543989340464274
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,16384,10240,0.11618133385976155
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,16384,10240,0.07313386599222818
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,16384,8192,0.09488106568654378
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,16384,8192,0.06203413407007853
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,65536,256,0.012128000458081562
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,16384,7168,0.0828266700108846
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,16384,7168,0.05638186534245809
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,65536,128,0.00965226689974467
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,16384,6144,0.07219200134277344
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,16384,6144,0.05066026846567789
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,16384,5120,0.06047893365224203
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,16384,5120,0.045057066281636554
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,16384,16384,0.09482133388519287
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,16384,12288,0.08433600266774496
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,16384,65536,0.3555189450581869
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,16384,4096,0.04920533498128255
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,16384,4096,0.04221013387044271
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,16384,3584,0.04362666606903076
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,16384,8192,0.05032853285471598
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,16384,10240,0.06150506734848023
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,16384,3584,0.03580586512883504
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,16384,3072,0.03805439869562785
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,16384,2560,0.032279467582702635
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,16384,3072,0.03338666756947835
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,16384,7168,0.044897067546844485
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,16384,2560,0.0300437331199646
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,16384,6144,0.03948906660079956
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,16384,2048,0.026307199398676557
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,16384,2048,0.026663466294606523
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,16384,1536,0.020835200945536293
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,16384,1536,0.023590399821599325
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,16384,1024,0.014621866742769876
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,16384,1024,0.020755199591318767
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,16384,5120,0.03378880023956299
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,16384,768,0.011493333180745443
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,16384,768,0.019288533926010133
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,16384,4096,0.03185706734657288
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,16384,512,0.008694400389989216
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,16384,512,0.017537067333857216
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,16384,3072,0.02260479927062988
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,16384,256,0.00581333339214325
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,16384,256,0.015089066823323569
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,16384,3584,0.025545599063237508
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,16384,128,0.003437866767247518
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,16384,128,0.015076266725858054
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,16384,2560,0.019913599888483683
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,16384,64,0.003239466746648153
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,16384,64,0.015121066570281982
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,16384,32,0.003289599965016047
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,16384,32,0.015134933590888976
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,16384,2048,0.01668693423271179
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,16384,1024,0.010641066233317058
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,16384,768,0.009213866790135701
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,12288,65536,0.5657887776692708
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,16384,1536,0.013804800311724343
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,12288,65536,0.29901971817016604
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,12288,16384,0.14534719785054523
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,12288,16384,0.1007263978322347
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,12288,12288,0.11200640201568604
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,12288,12288,0.06840426921844482
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,12288,10240,0.09382399717966715
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,12288,10240,0.05952426592508951
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,12288,8192,0.07637226581573486
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,12288,8192,0.050611201922098795
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,16384,512,0.007627733548482259
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,12288,7168,0.06674133141835531
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,12288,7168,0.04597226778666179
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,16384,256,0.005323733389377594
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,12288,6144,0.0555402676264445
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,12288,6144,0.04153173367182414
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,16384,128,0.004894933104515076
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,12288,5120,0.04643306732177734
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,12288,5120,0.03768213192621867
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,12288,65536,0.27801173528035483
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,12288,16384,0.0756704012552897
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,12288,4096,0.043112532297770186
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,12288,4096,0.032961066563924155
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,12288,12288,0.058317867914835606
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,12288,3584,0.03363946676254272
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,12288,3584,0.0305184006690979
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,12288,10240,0.04937386512756348
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,12288,3072,0.031617067257563275
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,12288,3072,0.027899734179178876
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,12288,2560,0.025062400102615356
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,12288,8192,0.04066453377405803
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,12288,2560,0.02573973337809245
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,12288,2048,0.02063573400179545
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,12288,2048,0.023423999547958374
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,12288,7168,0.036245334148406985
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,12288,1536,0.01616213321685791
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,12288,1536,0.02167146603266398
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,12288,6144,0.03190186619758606
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,12288,1024,0.011588266491889954
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,12288,1024,0.019374932845433554
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,12288,5120,0.027533866961797077
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,12288,768,0.010123733679453533
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,12288,768,0.01809599995613098
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,12288,4096,0.02312320073445638
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,12288,3072,0.018976000944773357
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,12288,512,0.007551999886830647
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,12288,512,0.015561599532763162
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,12288,2048,0.013770666718482972
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,12288,256,0.003942399968703588
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,12288,3584,0.02129279971122742
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,12288,256,0.015041066209475198
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,12288,128,0.0034976000587145484
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,12288,128,0.014879999558130899
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,12288,2560,0.016812799374262492
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,12288,64,0.003257599969704946
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,12288,64,0.014934399724006652
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,12288,32,0.003190399954716364
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,12288,32,0.014888532956441245
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,12288,1536,0.011770666639010111
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,12288,1024,0.009332266449928284
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,12288,768,0.008266666531562805
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,10240,65536,0.4820906639099121
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,10240,16384,0.12522133191426593
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,10240,16384,0.08663679758707682
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,10240,65536,0.2607413291931152
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,10240,12288,0.09565333525339761
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,10240,12288,0.06104746659596762
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,10240,10240,0.08054080009460449
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,10240,10240,0.053435734907786046
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,10240,8192,0.06743679841359457
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,10240,8192,0.045476265748341876
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,12288,512,0.005714133381843567
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,12288,256,0.005217066903909048
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,12288,128,0.005021866659323374
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,10240,7168,0.05321173270543417
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,10240,7168,0.040965334574381514
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,10240,6144,0.04633813301722209
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,10240,6144,0.03751573165257772
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,10240,65536,0.23925226529439292
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,10240,16384,0.06441066662470499
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,10240,5120,0.039288532733917234
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,10240,5120,0.036298668384552
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,10240,4096,0.03192746639251709
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,10240,12288,0.05717333157857259
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,10240,4096,0.029629866282145183
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,10240,3584,0.028424533208211263
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,10240,3584,0.027772800127665205
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,10240,10240,0.041782399018605546
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,10240,3072,0.025015467405319215
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,10240,3072,0.025835732618967693
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,10240,2560,0.0213045338789622
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,10240,2560,0.02418346603711446
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,10240,8192,0.0346560001373291
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,10240,2048,0.017972266674041747
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,10240,2048,0.022310400009155275
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,10240,7168,0.03105386694272359
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,10240,1536,0.013806933164596557
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,10240,1536,0.020525866746902467
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,10240,6144,0.028068266312281293
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,10240,1024,0.010186666250228881
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,10240,1024,0.01863893270492554
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,10240,5120,0.02400746742884318
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,10240,768,0.008472533027331034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,10240,768,0.016745599110921223
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,10240,4096,0.020627200603485107
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,10240,3072,0.016638933618863424
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,10240,512,0.006727466483910878
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,10240,512,0.015202132860819497
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,10240,3584,0.01886826753616333
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,10240,2048,0.01260586678981781
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,10240,2560,0.014603733023007711
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,10240,256,0.0035189333061377203
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,10240,256,0.015044266978899637
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,10240,1536,0.010966400305430096
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,10240,128,0.003286399940649668
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,10240,1024,0.008886399865150451
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,10240,128,0.014763733744621277
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,10240,64,0.003018666555484136
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,10240,64,0.014884266257286071
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,10240,32,0.0030421334008375804
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,10240,768,0.00689279983441035
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,10240,32,0.014760532975196838
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,8192,65536,0.20134612719217934
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,8192,65536,0.37976214090983074
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,8192,16384,0.09252479871114096
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,8192,16384,0.06109013160069784
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,8192,12288,0.08324053287506103
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,10240,256,0.005224533379077911
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,8192,10240,0.059577600161234534
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,10240,512,0.005524266759554545
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,8192,12288,0.05049066543579102
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,8192,10240,0.04427733421325684
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,8192,8192,0.048317865530649824
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,8192,8192,0.03849386771519979
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,8192,7168,0.04258453448613485
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,8192,7168,0.035582931836446126
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,8192,6144,0.03731199900309245
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,8192,6144,0.032679466406504314
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,10240,128,0.004903466502825419
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,8192,5120,0.03173546592394511
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,8192,5120,0.02985919912656148
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,8192,16384,0.05181866486867269
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,8192,65536,0.18639465967814128
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,8192,4096,0.025844266017278034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,8192,4096,0.028809599081675213
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,8192,12288,0.040201600392659506
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,8192,3584,0.023066665728886923
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,8192,10240,0.034305067857106526
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,8192,3584,0.024980266888936363
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,8192,3072,0.020346667369206747
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,8192,3072,0.023545600970586143
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,8192,8192,0.02884053389231364
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,8192,2560,0.017529600858688356
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,8192,2560,0.02222933371861776
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,8192,7168,0.02593280076980591
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,8192,6144,0.023111466566721597
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,8192,2048,0.014355199535687766
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,8192,2048,0.02072746753692627
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,8192,1536,0.011559466520945232
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,8192,1536,0.01915093262990316
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,8192,5120,0.020197333892186482
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,8192,1024,0.008590933680534363
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,8192,1024,0.01769599914550781
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,8192,768,0.007229866584142049
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,8192,768,0.015291733543078103
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,8192,4096,0.017395200332005818
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,8192,512,0.005757866799831391
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,8192,512,0.01520746648311615
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,8192,3072,0.013704533378283182
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,8192,256,0.003554133325815201
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,8192,3584,0.015657599767049155
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,8192,256,0.014941866199175516
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,8192,2048,0.01095360020796458
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,8192,128,0.003107200066248576
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,8192,128,0.01472106675306956
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,8192,64,0.0030048000315825146
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,8192,64,0.014912000298500061
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,8192,2560,0.01244586706161499
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,8192,32,0.003081600119670232
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,8192,32,0.014842666188875833
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,8192,1536,0.009873066345850627
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,8192,1024,0.007892266909281413
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,7168,65536,0.3281536102294922
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,7168,65536,0.18747307459513346
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,8192,768,0.006060799956321717
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,7168,16384,0.09507839679718018
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,7168,16384,0.06265173355738321
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,8192,512,0.005576533575852713
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,8192,256,0.005211733281612396
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,8192,128,0.0049098665515581764
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,7168,12288,0.0630399982134501
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,7168,12288,0.04668266773223877
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,7168,10240,0.05566293398539225
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,7168,10240,0.04182826677958171
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,7168,8192,0.04322559833526611
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,7168,8192,0.036085331439971925
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,7168,7168,0.03892159859339396
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,7168,7168,0.03333546717961629
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,7168,6144,0.03414933284123738
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,7168,6144,0.030880000193913775
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,7168,5120,0.02844480077425639
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,7168,5120,0.02792746623357137
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,7168,65536,0.16483306884765625
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,7168,4096,0.026423466205596925
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,7168,4096,0.025561600923538208
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,7168,16384,0.04681599934895833
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,7168,3584,0.021005866924921672
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,7168,3584,0.024046933650970458
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,7168,12288,0.036562132835388186
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,7168,3072,0.018682666619618735
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,7168,10240,0.03127999901771546
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,7168,3072,0.022753065824508666
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,7168,2560,0.015995732943216958
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,7168,8192,0.02608533302942912
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,7168,2560,0.02203413248062134
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,7168,2048,0.013178666432698568
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,7168,2048,0.02020053267478943
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,7168,7168,0.023450666666030885
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,7168,1536,0.010777599612871806
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,7168,1536,0.018910932540893554
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,7168,6144,0.020933334032694498
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,7168,1024,0.008181333541870117
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,7168,1024,0.01580693324406942
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,7168,5120,0.018402133385340372
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,7168,768,0.006811733543872833
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,7168,768,0.014888532956441245
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,7168,4096,0.015261866648991904
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,7168,512,0.004339199761549632
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,7168,512,0.015250133474667868
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,7168,3584,0.014121599992116294
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,7168,256,0.003370666752258936
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,7168,256,0.01495680014292399
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,7168,3072,0.012738133470217386
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,7168,128,0.003127466638882955
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,7168,128,0.014732799927393594
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,7168,2048,0.010360532999038696
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,7168,64,0.00296426663796107
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,7168,2560,0.011889066298802693
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,7168,64,0.014672000209490457
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,7168,32,0.0029311999678611755
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,7168,1536,0.009231999516487122
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,7168,32,0.014680533607800802
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,6144,65536,0.15964479446411134
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,6144,65536,0.2824074745178223
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,7168,1024,0.006680533289909363
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,6144,16384,0.07612053553263345
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,7168,768,0.005895466605822245
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,6144,16384,0.05227200190226237
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,7168,512,0.0055071999629338585
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,6144,12288,0.05834240118662516
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,6144,12288,0.04305493434270223
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,6144,10240,0.04997333288192749
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,6144,10240,0.038344534238179524
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,6144,8192,0.039529601732889816
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,6144,8192,0.03403520186742147
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,7168,256,0.0051925331354141235
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,6144,7168,0.03450773159662883
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,6144,7168,0.032096000512441
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,7168,128,0.004860800007979075
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,6144,6144,0.030371199051539104
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,6144,6144,0.02993920048077901
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,6144,5120,0.0258026659488678
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,6144,5120,0.027061333258946735
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,6144,65536,0.1439093271891276
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,6144,4096,0.024281599124272666
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,6144,4096,0.02435200015703837
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,6144,16384,0.04118826786677043
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,6144,3584,0.019250132640202842
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,6144,3584,0.023661865790685018
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,6144,12288,0.03265173236529033
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,6144,3072,0.01699413259824117
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,6144,3072,0.022062933444976805
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,6144,10240,0.02797973354657491
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,6144,2560,0.014686933159828186
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,6144,2560,0.02058560053507487
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,6144,8192,0.02335360050201416
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,6144,2048,0.012482133507728577
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,6144,2048,0.01949013272921244
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,6144,7168,0.021192532777786256
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,6144,1536,0.010003200173377991
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,6144,1536,0.018321067094802856
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,6144,6144,0.01900906761487325
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,6144,1024,0.007518933216730754
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,6144,1024,0.015338666240374246
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,6144,5120,0.016497066617012023
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,6144,768,0.006359466910362243
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,6144,768,0.015108266472816467
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,6144,4096,0.014029866456985474
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,6144,512,0.00383146678407987
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,6144,512,0.014917332927385965
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,6144,3072,0.011912533640861511
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,6144,3584,0.013112533092498779
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,6144,256,0.00327360009153684
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,6144,256,0.01476693352063497
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,6144,2048,0.009648000200589497
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,6144,128,0.0030389333764712016
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,6144,128,0.014702933033307395
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,6144,64,0.0028575999041398365
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,6144,2560,0.01092693308989207
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,6144,64,0.014723199605941772
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,6144,32,0.002850133428970973
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,6144,32,0.014566399653752646
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,5120,65536,0.24224747021993004
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,6144,1024,0.006300800045331319
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,5120,65536,0.14018239974975585
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,6144,1536,0.00869546631971995
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,5120,16384,0.06396693388621012
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,6144,768,0.005716266731421152
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,6144,512,0.005447466671466827
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,5120,16384,0.04585173527399699
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,6144,128,0.0048096001148223875
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,6144,256,0.004981333514054617
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,5120,12288,0.04902400175730388
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,5120,12288,0.03808533350626628
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,5120,10240,0.04166080156962077
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,5120,65536,0.12532052993774415
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,5120,10240,0.03367253144582112
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,5120,8192,0.03377066850662232
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,5120,16384,0.03627626498540242
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,5120,8192,0.030419200658798218
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,5120,12288,0.028935466210047407
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,5120,7168,0.0317738672097524
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,5120,7168,0.028675200541814168
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,5120,6144,0.025565866629282636
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,5120,10240,0.024795732895533242
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,5120,6144,0.026231465737024943
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,5120,5120,0.02141759991645813
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,5120,8192,0.020498132705688475
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,5120,5120,0.0243562658627828
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,5120,4096,0.017798399925231932
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,5120,4096,0.02254293362299601
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,5120,3584,0.01595200002193451
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,5120,3584,0.021603200833002725
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,5120,3072,0.014059733351071677
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,5120,7168,0.018953599532445273
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,5120,3072,0.020756266514460244
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,5120,2560,0.012330666184425354
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,5120,2560,0.01950506567955017
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,5120,2048,0.010478933652242024
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,5120,6144,0.016821332772572837
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,5120,5120,0.014801067113876343
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,5120,2048,0.01850879987080892
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,5120,1536,0.008733866612116496
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,5120,3584,0.01209920048713684
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,5120,1536,0.017221333583196004
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,5120,4096,0.012947199741999307
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,5120,1024,0.006878933310508728
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,5120,1024,0.016131200393040977
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,5120,3072,0.010964266459147136
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,5120,2560,0.010359467069307963
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,5120,768,0.005226666728655497
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,5120,768,0.0151829332113266
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,5120,512,0.0036159999668598174
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,5120,2048,0.009091200431187947
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,5120,512,0.015016532937685647
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,5120,256,0.0032821332414944967
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,5120,128,0.0029781334102153777
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,5120,256,0.01458346645037333
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,5120,1536,0.007712000111738841
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,5120,128,0.014595199624697366
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,5120,64,0.0028768000503381092
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,5120,1024,0.00606826643149058
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,5120,768,0.005605333546797434
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,5120,64,0.014485333363215128
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,5120,32,0.0029045333464940387
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,5120,32,0.014519466956456503
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,4096,65536,0.18617812792460126
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,4096,65536,0.11209279696146648
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,4096,16384,0.05035306612650553
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,4096,16384,0.03979626496632894
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,5120,512,0.005358933409055074
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,5120,256,0.005049600203831991
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,4096,12288,0.03938773473103841
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,4096,12288,0.03406293392181396
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,5120,128,0.004849066833655039
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,4096,10240,0.033763198057810466
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,4096,10240,0.030845866600672407
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,4096,8192,0.02775786717732747
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,4096,8192,0.027611732482910156
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,4096,65536,0.10802559852600098
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,4096,16384,0.03594559828440348
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,4096,7168,0.025253333648045856
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,4096,7168,0.025978666543960572
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,4096,6144,0.02232746680577596
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,4096,12288,0.025254400571187337
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,4096,6144,0.02408000032107035
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,4096,5120,0.0180842657883962
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,4096,5120,0.022530132532119752
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,4096,4096,0.01601920028527578
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,4096,10240,0.021913599967956544
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,4096,4096,0.021206400791803994
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,4096,3584,0.014455466469128927
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,4096,3584,0.020498132705688475
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,4096,8192,0.018285866578420004
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,4096,7168,0.01687893271446228
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,4096,3072,0.011868799726168316
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,4096,3072,0.01983039975166321
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,4096,6144,0.015052800377209982
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,4096,5120,0.013430399696032205
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,4096,2560,0.010470400253931682
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,4096,2560,0.018605866034825645
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,4096,2048,0.009052800138791402
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,4096,4096,0.011741866668065388
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,4096,2048,0.017704532543818156
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,4096,1536,0.007500799993673961
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,4096,1536,0.015588266650835672
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,4096,1024,0.006022400160630544
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,4096,1024,0.015266133348147073
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,4096,3584,0.011170132954915365
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,4096,768,0.00386559988061587
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,4096,3072,0.010181333621342976
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,4096,768,0.015066666404406228
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,4096,512,0.014738133549690247
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,4096,512,0.0034805332620938623
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,4096,2560,0.00965013305346171
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,4096,2048,0.008135466774304708
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,4096,256,0.003188266605138779
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,4096,1536,0.0068234667181968685
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,4096,1024,0.005929600199063619
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,4096,256,0.014615466197331747
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,4096,768,0.005493333439032236
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,4096,512,0.00514986664056778
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,4096,128,0.0029845332105954488
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,4096,128,0.01454080045223236
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,3584,65536,0.17266880671183268
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,4096,64,0.002807466685771942
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,4096,64,0.014588800072669984
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,4096,32,0.0029994666576385496
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,4096,32,0.014537599682807923
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,4096,256,0.004946133494377137
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,3584,65536,0.10403199990590413
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,3584,12288,0.03490453163782756
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,4096,128,0.004748799900213877
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,3584,16384,0.04506133397420247
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,3584,10240,0.02879253427187602
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,3584,16384,0.03691200017929077
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,3584,12288,0.03190400004386902
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,3584,10240,0.030039467414220172
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,3584,8192,0.025066665808359784
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,3584,8192,0.026101332902908326
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,3584,7168,0.022762666145960488
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,3584,65536,0.09869440396626791
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,3584,6144,0.020182400941848755
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,3584,7168,0.024528000752131143
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,3584,6144,0.023385600248972575
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,3584,5120,0.017629865805308023
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,3584,5120,0.022130133708318074
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,3584,16384,0.03026240070660909
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,3584,4096,0.01470186710357666
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,3584,4096,0.02029333313306173
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,3584,12288,0.024089600642522177
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,3584,3584,0.013155200084050498
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,3584,3584,0.019656533002853395
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,3584,10240,0.021109332640965782
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,3584,3072,0.010889599720637005
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,3584,3072,0.019432532787322997
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,3584,8192,0.017030400037765504
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,3584,2560,0.009763200084368389
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,3584,2560,0.01845653255780538
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,3584,2048,0.008627200126647949
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,3584,2048,0.017539199193318686
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,3584,7168,0.016461867094039916
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,3584,1536,0.007187200089295705
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,3584,1536,0.01562666694323222
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,3584,6144,0.014670933286348978
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,3584,1024,0.004654933512210846
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,3584,1024,0.015405866503715514
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,3584,5120,0.012730666995048523
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,3584,768,0.0038922667503356934
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,3584,768,0.015001599987347921
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,3584,4096,0.011542399724324543
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,3584,512,0.003572266548871994
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,3584,512,0.01498133341471354
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,3584,3584,0.011041067043940226
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,3584,256,0.0031871999303499854
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,3584,256,0.014441600441932679
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,3584,3072,0.010150399804115296
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,3584,128,0.0029397333661715193
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,3584,2048,0.007698133091131846
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,3584,128,0.014601600170135499
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,3584,2560,0.00927786628405253
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,3584,64,0.002846933404604594
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,3584,1024,0.0060576001803080235
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,3584,1536,0.0067562664548556015
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,3584,64,0.014734933773676554
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,3584,32,0.002829866607983907
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,3584,768,0.005560533205668131
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,3584,32,0.014447999993960061
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,3072,65536,0.14585173924763997
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,3584,512,0.005347200234731038
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,3072,65536,0.09117440382639566
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,3584,256,0.005067733426888784
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,3584,128,0.0049002667268117275
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,3072,16384,0.040403199195861814
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,3072,16384,0.034883201122283936
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,3072,12288,0.03192960023880005
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,3072,12288,0.03015039960543315
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,3072,10240,0.02728640039761861
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,3072,10240,0.0274944007396698
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,3072,65536,0.09556907018025716
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,3072,8192,0.023396267493565878
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,3072,8192,0.025224532683690386
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,3072,16384,0.030842665831247968
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,3072,7168,0.020858667294184365
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,3072,7168,0.023808000485102336
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,3072,6144,0.01859626571337382
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,3072,6144,0.022615466515223184
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,3072,5120,0.01606826682885488
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,3072,12288,0.022357332706451415
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,3072,5120,0.021387734015782676
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,3072,4096,0.013913599650065103
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,3072,4096,0.019718400637308755
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,3072,10240,0.019709867238998414
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,3072,3584,0.01276586651802063
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,3072,3584,0.019181867440541588
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,3072,8192,0.015821866194407144
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,3072,7168,0.015294933319091797
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,3072,3072,0.011490133404731751
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,3072,6144,0.013618133465449014
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,3072,3072,0.01853440006573995
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,3072,2560,0.009005866448084513
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,3072,2560,0.017774933576583864
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,3072,5120,0.012652800480524699
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,3072,2048,0.007830399771531422
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,3072,2048,0.016532267133394875
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,3072,1536,0.006631466746330261
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,3072,4096,0.011125333110491435
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,3072,1536,0.016167466839154564
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,3072,1024,0.004147200038035711
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,3072,3072,0.009527466694513957
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,3072,768,0.0038581334054470064
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,3072,3584,0.010506666700045268
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,3072,1024,0.015356799960136414
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,3072,2560,0.008410666386286418
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,3072,512,0.014882133404413859
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,3072,768,0.015126400192578635
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,3072,256,0.0032159999012947083
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,3072,2048,0.007418666779994964
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,3072,512,0.0034495999415715536
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,3072,256,0.014689067006111145
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,3072,1536,0.006595199803511302
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,3072,128,0.0029535998900731405
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,3072,128,0.01444906691710154
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,3072,64,0.0028319999575614927
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,3072,1024,0.00589333325624466
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,3072,768,0.005510400235652924
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,3072,32,0.014424533645311991
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,3072,64,0.014578133821487427
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,3072,32,0.0027829334139823914
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,3072,512,0.005253333350022634
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,2560,65536,0.1220970630645752
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,2560,65536,0.07849813302357991
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,3072,256,0.005031466484069824
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,3072,128,0.004880000154177348
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,2560,16384,0.033556266625722246
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,2560,16384,0.03141760031382243
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,2560,12288,0.02680533329645793
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,2560,12288,0.02679786682128906
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,2560,10240,0.02334400018056234
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,2560,10240,0.02486720085144043
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,2560,8192,0.0194432000319163
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,2560,65536,0.09340799649556478
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,2560,8192,0.02286826570828756
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,2560,7168,0.017491199572881064
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,2560,7168,0.022562134265899658
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,2560,16384,0.02598186731338501
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,2560,6144,0.015492266416549683
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,2560,6144,0.021109332640965782
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,2560,5120,0.01359999974568685
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,2560,12288,0.022124799092610677
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,2560,5120,0.02047146757443746
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,2560,10240,0.01918399930000305
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,2560,4096,0.011780266960461933
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,2560,4096,0.01930346687634786
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,2560,3584,0.011020800471305848
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,2560,8192,0.015459199746449789
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,2560,3584,0.018812799453735353
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,2560,3072,0.010121599833170573
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,2560,7168,0.015077333648999533
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,2560,3072,0.017849600315093993
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,2560,6144,0.013864533106486002
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,2560,2560,0.008412800232569377
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,2560,2560,0.016570666432380678
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,2560,5120,0.012162133057912191
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,2560,2048,0.007212799787521362
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,2560,2048,0.016457600394884746
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,2560,1536,0.005703466633955637
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,2560,1536,0.015588266650835672
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,2560,4096,0.010785067081451416
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,2560,1024,0.004408533374468485
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,2560,3584,0.010540800293286641
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,2560,1024,0.015442132949829102
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,2560,3072,0.0086816002925237
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,2560,768,0.0038015998899936674
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,2560,768,0.015056000153223673
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,2560,512,0.0034677334129810332
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,2560,512,0.014800000190734863
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,2560,256,0.0031680000325044
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,2560,2560,0.00795413355032603
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,2560,256,0.014538666605949402
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,2560,2048,0.007057066758473713
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,2560,128,0.014457600315411887
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,2560,128,0.0029472000896930695
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,2560,64,0.0027797333896160126
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,2560,1536,0.006629333396752675
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,2560,1024,0.00584853341182073
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,2560,768,0.005584000051021576
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,2560,512,0.005446400245030721
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,2560,32,0.0027562665442625684
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,2048,65536,0.06482346852620444
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,2560,64,0.014525866508483887
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,2560,32,0.014504533012708029
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,2048,65536,0.09715413252512614
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,2560,256,0.005060266455014547
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,2048,16384,0.027727999289830524
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,2048,16384,0.027937066555023194
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,2560,128,0.0048991998036702475
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,2048,12288,0.02221226692199707
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,2048,12288,0.024321067333221435
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,2048,10240,0.019348265727361043
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,2048,10240,0.023139200607935586
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,2048,8192,0.021565866470336915
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,2048,8192,0.01594239970048269
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,2048,7168,0.014362667004267374
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,2048,7168,0.020962133010228475
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,2048,65536,0.08618773619333903
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,2048,6144,0.012955733140309653
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,2048,6144,0.02029119928677877
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,2048,16384,0.02513706684112549
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,2048,5120,0.011763200163841248
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,2048,5120,0.019564799467722573
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,2048,4096,0.010198400417963664
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,2048,4096,0.018068265914916993
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,2048,12288,0.0201365331808726
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,2048,10240,0.01794346570968628
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,2048,8192,0.015330132842063905
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,2048,3584,0.009597866733868917
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,2048,7168,0.014380799730618796
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,2048,3584,0.018054399887720743
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,2048,6144,0.01311360001564026
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,2048,3072,0.008989866574605305
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,2048,5120,0.011572266618410747
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,2048,3072,0.01696959932645162
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,2048,2560,0.007707733412583668
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,2048,4096,0.010358400146166484
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,2048,2560,0.016621866822242738
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,2048,2048,0.006786133348941803
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,2048,2048,0.01572266618410746
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,2048,1536,0.005141333242257436
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,2048,3584,0.009753599762916565
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,2048,1536,0.015982932845751443
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,2048,3072,0.00871573289235433
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,2048,1024,0.004149333387613296
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,2048,1024,0.015069866180419922
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,2048,2560,0.008011733492215473
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,2048,2048,0.006951466699441274
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,2048,768,0.0038015998899936674
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,2048,768,0.015061333775520325
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,2048,512,0.0035071998834609987
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,2048,1024,0.005797333518664042
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,2048,512,0.014886400103569031
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,2048,256,0.003068800022204717
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,2048,1536,0.006499200065930684
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,2048,256,0.014684800306955972
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,2048,128,0.0028405333558718365
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,2048,64,0.014640000462532044
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,2048,128,0.01442026694615682
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,2048,64,0.0026677332818508146
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,2048,768,0.005502933263778686
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,2048,512,0.0052821333209673565
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,2048,32,0.0027285332481066385
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,1536,16384,0.02248319983482361
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,2048,32,0.01439466675122579
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,2048,256,0.0049002667268117275
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,1536,65536,0.07512533664703369
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,1536,65536,0.054832001527150474
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,2048,128,0.0046965335806210835
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,1536,16384,0.025063467025756837
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,1536,12288,0.018152532974878947
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,1536,12288,0.022643200556437173
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,1536,65536,0.08530666828155517
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,1536,10240,0.01572053333123525
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,1536,10240,0.021638399362564086
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,1536,8192,0.013277866442998252
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,1536,16384,0.025040000677108765
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,1536,8192,0.020116267601648967
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,1536,7168,0.012312533458073933
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,1536,7168,0.019831466674804687
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,1536,12288,0.020012799898783365
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,1536,6144,0.011318399508794149
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,1536,6144,0.01932586630185445
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,1536,10240,0.01770240068435669
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,1536,5120,0.010216533144315084
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,1536,5120,0.019214934110641478
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,1536,8192,0.015346133708953857
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,1536,4096,0.008958933750788371
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,1536,4096,0.01758613387743632
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,1536,7168,0.014069333672523499
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,1536,3584,0.008633599678675333
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,1536,3584,0.017474132776260375
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,1536,6144,0.012625066439310708
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,1536,3072,0.008054399987061818
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,1536,3072,0.016641066471735636
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,1536,5120,0.011329066753387452
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,1536,2560,0.0070816000302632645
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,1536,2560,0.016264533003171287
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,1536,4096,0.00944213370482127
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,1536,2048,0.005443199972311656
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,1536,2048,0.015944533546765647
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,1536,1536,0.004682666560014089
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,1536,1536,0.01569386621316274
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,1536,3584,0.009097599983215332
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,1536,3072,0.008116266628106435
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,1536,1024,0.004030933231115341
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,1536,1024,0.015190399686495461
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,1536,2560,0.007773866752783458
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,1536,768,0.0038090666135152185
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,1536,768,0.015082666277885437
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,1536,2048,0.006932266553243001
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,1536,512,0.0034122665723164878
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,1536,512,0.014896000425020853
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,1536,1536,0.006449066599210103
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,1536,1024,0.005880533158779145
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,1536,256,0.0030570665995279947
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,1536,256,0.014664533734321594
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,1536,768,0.005520000060399374
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,1536,128,0.002917333443959554
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,1536,32,0.0026837334036827086
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,1536,512,0.005109333495299021
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,1536,128,0.014587733149528503
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,1536,64,0.002746666719516118
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,1536,64,0.014670933286348978
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,1536,32,0.01442026694615682
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,1024,65536,0.05053546826044718
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,1536,256,0.0048885335524876915
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,1024,65536,0.04228800137837728
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,1536,128,0.004779733220736186
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,1024,16384,0.01597760021686554
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,1024,16384,0.022123734156290688
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,1024,12288,0.013477333386739097
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,1024,12288,0.019951999187469482
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,1024,10240,0.012129066387812297
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,1024,10240,0.019595734278361
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,1024,8192,0.010481066505114238
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,1024,8192,0.01876266598701477
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,1024,7168,0.009822932879130046
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,1024,65536,0.08362987041473388
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,1024,7168,0.018233599265416463
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,1024,6144,0.00885973374048869
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,1024,6144,0.01767573356628418
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,1024,16384,0.024980266888936363
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,1024,5120,0.00843519965807597
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,1024,12288,0.019947733481725058
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,1024,5120,0.017930666605631508
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,1024,4096,0.007716266810894013
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,1024,10240,0.01770346760749817
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,1024,4096,0.01738133430480957
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,1024,8192,0.01511253317197164
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,1024,3584,0.007074133555094402
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,1024,3584,0.016953599452972413
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,1024,7168,0.013693867127100625
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,1024,6144,0.011884799599647522
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,1024,3072,0.0065184002121289565
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,1024,5120,0.01074026624361674
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,1024,3072,0.016515200336774193
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,1024,2560,0.006038400034109751
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,1024,2560,0.016387200355529784
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,1024,4096,0.009336533149083455
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,1024,2048,0.0054517333706219995
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,1024,2048,0.01683733264605204
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,1024,3584,0.009008000294367473
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,1024,3072,0.008108800152937572
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,1024,1536,0.004692266881465912
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,1024,1536,0.015570132931073507
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,1024,1024,0.004277333120505015
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,1024,2560,0.00831573357184728
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,1024,1024,0.0151146670182546
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,1024,768,0.0036960000793139136
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,1024,768,0.014932266871134438
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,1024,2048,0.006843733290831248
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,1024,512,0.0034527999659379324
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,1024,256,0.003099733342727025
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,1024,512,0.014645333091417948
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,1024,256,0.014724266529083253
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,1024,128,0.0028757333755493166
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,1024,1024,0.005884799857934316
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,1024,768,0.005545599758625031
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,1024,128,0.014550399780273438
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,1024,1536,0.006488533318042755
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,1024,32,0.014482133587201438
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,1024,64,0.002677333354949951
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,1024,512,0.005171200136343638
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,768,65536,0.03661226828893026
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,1024,64,0.014553599556287131
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,1024,256,0.0049333333969116214
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,1024,32,0.0026346666117509207
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,1024,128,0.004756266872088114
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,768,65536,0.03978026707967122
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,768,16384,0.013433600465456644
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,768,16384,0.020528000593185425
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,768,12288,0.011532800396283467
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,768,12288,0.018913066387176512
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,768,65536,0.08357760111490885
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,768,10240,0.010630399982134501
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,768,10240,0.019363200664520262
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,768,16384,0.025121066967646283
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,768,8192,0.009379200140635173
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,768,8192,0.01775040030479431
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,768,12288,0.01993066668510437
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,768,7168,0.008616532882054646
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,768,7168,0.017965867122014364
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,768,6144,0.007928533355395
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,768,6144,0.017667200167973837
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,768,10240,0.017340799172719322
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,768,4096,0.006420266628265381
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,768,5120,0.007445333401362102
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,768,8192,0.014531200130780539
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,768,5120,0.018206934134165444
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,768,4096,0.017357865969340004
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,768,7168,0.01359999974568685
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,768,3072,0.00614933321873347
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,768,3584,0.0067552000284194945
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,768,3584,0.01699413259824117
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,768,6144,0.011789866288503011
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,768,3072,0.016872533162434897
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,768,2560,0.006056533257166544
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,768,5120,0.01053653359413147
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,768,2560,0.016340266664822897
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,768,2048,0.005499733487764994
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,768,4096,0.009390933314959209
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,768,2048,0.01604586640993754
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,768,3584,0.008933333555857341
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,768,1536,0.004805333415667216
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,768,1536,0.015422933300336204
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,768,3072,0.008131200075149536
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,768,768,0.0037717332442601522
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,768,1024,0.003999999910593033
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,768,1024,0.01527466674645742
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,768,768,0.014776532848676046
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,768,2560,0.00766293356815974
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,768,512,0.003357866654793421
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,768,512,0.01498133341471354
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,768,256,0.0030965333183606463
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,768,2048,0.007018666466077168
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,768,256,0.014643200238545737
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,768,1536,0.006503466765085857
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,768,768,0.005539200206597646
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,768,128,0.0028757333755493166
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,768,1024,0.005704533557097117
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,768,512,0.005109333495299021
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,768,128,0.014550399780273438
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,768,64,0.0027082666754722597
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,768,64,0.014482133587201438
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,768,32,0.00277866671482722
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,768,256,0.004906666775544485
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,768,32,0.014430933197339377
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,512,65536,0.028467200199762982
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,512,65536,0.03065813382466634
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,768,128,0.004786133269468943
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,512,16384,0.010647466778755188
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,512,16384,0.02004479964574178
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,512,12288,0.009315199653307597
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,512,12288,0.01775253415107727
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,512,10240,0.00881599982579549
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,512,10240,0.018258132537206016
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,512,8192,0.007646933197975159
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,512,8192,0.01771946748097738
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,512,65536,0.08492693106333414
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,512,7168,0.006878933310508728
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,512,7168,0.01816426714261373
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,512,6144,0.006371200084686279
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,512,16384,0.02476480007171631
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,512,6144,0.01790720025698344
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,512,12288,0.019547732671101888
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,512,5120,0.006613333523273468
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,512,5120,0.018178133169809978
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,512,10240,0.017027199268341064
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,512,7168,0.012981333335240684
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,512,4096,0.005942399799823761
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,512,8192,0.014104533195495605
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,512,4096,0.017947733402252197
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,512,3584,0.006567466755708058
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,512,6144,0.011798399686813354
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,512,3584,0.01692053278287252
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,512,3072,0.006260266900062561
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,512,5120,0.010442666212717692
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,512,3072,0.01665386656920115
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,512,2560,0.00622506688038508
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,512,2560,0.016124799847602844
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,512,4096,0.009226666887601216
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,512,1536,0.0046410664916038515
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,512,2048,0.0053727999329566956
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,512,2048,0.015910399953524272
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,512,1536,0.015661866466204325
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,512,3584,0.009000533819198608
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,512,1024,0.0040853333969910945
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,512,1024,0.015255467096964518
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,512,768,0.003640533238649368
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,512,768,0.015061333775520325
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,512,512,0.003336533407370249
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,512,512,0.01493013302485148
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,512,3072,0.008125866452852886
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,512,1536,0.006534400085608165
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,512,256,0.003028266628583272
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,512,2560,0.007714133461316426
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,512,2048,0.006838400165239971
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,512,256,0.01458133359750112
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,512,128,0.0029120000700155893
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,512,768,0.0053610667586326596
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,512,1024,0.005676800012588501
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,512,128,0.014345600207646688
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,512,512,0.005095466474692027
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,512,256,0.004891733328501383
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,512,64,0.002712533374627431
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,512,64,0.014545067151387533
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,512,32,0.002721066772937775
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,512,128,0.004730666677157084
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,512,32,0.014318933089574179
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,256,12288,0.0065749332308769224
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,256,65536,0.017992534240086875
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,256,65536,0.0244159996509552
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,256,16384,0.007650133470694225
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,256,16384,0.01816533406575521
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,256,12288,0.017965867122014364
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,256,10240,0.006363733112812043
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,256,10240,0.018337066968282065
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,256,8192,0.006218666831652323
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,256,8192,0.0172650674978892
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,256,7168,0.006053333481152853
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,256,7168,0.01825493375460307
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,256,6144,0.005977599819501241
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,256,6144,0.01772586703300476
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,256,65536,0.08314452966054281
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,256,16384,0.024332799514134727
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,256,12288,0.018977065881093345
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,256,5120,0.0063360000650088
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,256,4096,0.00598826656738917
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,256,5120,0.01800533334414164
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,256,4096,0.017425066232681273
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,256,10240,0.01649493376413981
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,256,8192,0.014186666409174601
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,256,7168,0.013008000453313193
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,256,3584,0.006369066735108693
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,256,3584,0.016945066054662068
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,256,3072,0.0060917332768440245
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,256,3072,0.0167797327041626
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,256,2560,0.0059456000725428265
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,256,2560,0.016005333264668783
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,256,2048,0.005323733389377594
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,256,2048,0.015752533078193666
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,256,6144,0.011702400445938111
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,256,1536,0.004632533093293508
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,256,1536,0.015522133310635886
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,256,5120,0.01072106659412384
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,256,1024,0.004046933352947235
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,256,1024,0.015080533425013223
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,256,768,0.0036778666079044344
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,256,768,0.01506239970525106
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,256,4096,0.009265066186587016
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,256,512,0.0033546666304270422
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,256,512,0.014866133530934652
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,256,3072,0.008106666803359985
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,256,256,0.002932266642649968
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,256,256,0.014402133226394654
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,256,3584,0.008870399991671244
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,256,128,0.0028490667541821797
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,256,128,0.014333867033322654
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,256,2560,0.007619200150171916
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,256,2048,0.0068341334660847975
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,256,64,0.002721066772937775
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,256,32,0.00271573339899381
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,256,64,0.014377599954605103
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,256,32,0.014261333147684732
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,128,65536,0.011869866649309795
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,128,65536,0.020884267489115396
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,128,16384,0.0060479998588562015
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,128,16384,0.017837866147359212
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,256,1024,0.00565226674079895
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,256,1536,0.006393600006898243
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,128,12288,0.006239999830722809
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,256,768,0.005414400001366933
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,128,12288,0.017488000790278117
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,256,512,0.005267199873924255
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,256,256,0.004901333153247834
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,256,128,0.004748799900213877
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,128,10240,0.006362666686375936
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,128,10240,0.017961599429448447
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,128,8192,0.006105599800745646
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,128,8192,0.017451733350753784
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,128,65536,0.0829482634862264
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,128,16384,0.023729066054026283
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,128,7168,0.005914666752020518
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,128,7168,0.017709867159525553
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,128,6144,0.005745066702365876
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,128,12288,0.019262933731079103
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,128,6144,0.017778132359186807
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,128,5120,0.006054399907588959
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,128,5120,0.0176256000995636
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,128,4096,0.005835733314355215
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,128,4096,0.017027199268341064
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,128,3584,0.00624533345301946
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,128,3584,0.016713599363962807
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,128,10240,0.01658453345298767
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,128,3072,0.005797333518664042
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,128,3072,0.017035732666651406
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,128,2560,0.005782400071620941
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,128,2560,0.016582399606704712
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,128,8192,0.014105600118637086
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,128,2048,0.0051701332132021586
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,128,2048,0.015715199708938598
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,128,7168,0.012937600413958231
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,128,1536,0.004531200230121613
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,128,1536,0.015372799833615622
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,128,6144,0.011793067057927448
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,128,1024,0.003967999915281932
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,128,5120,0.010572800040245056
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,128,1024,0.015144532918930054
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,128,768,0.00365226666132609
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,128,768,0.014789332946141561
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,128,4096,0.009160533547401428
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,128,512,0.0032426667710145317
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,128,512,0.014604799946149192
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,128,3072,0.008120533327261608
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,128,256,0.002880000074704488
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,128,3584,0.008888533711433411
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,128,256,0.014503467082977294
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,128,128,0.002738133321205775
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,128,128,0.014316800236701965
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,128,2048,0.006851199766000111
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,128,2560,0.007580799857775371
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,128,64,0.002643200010061264
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,128,64,0.01458133359750112
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,128,32,0.0026208000878492994
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,128,32,0.014392532904942832
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,64,65536,0.008778666456540424
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,64,16384,0.005966933568318685
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,64,65536,0.019988266626993816
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,64,16384,0.018181333939234413
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,64,12288,0.005990399916966756
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,128,1536,0.0063967997829119366
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,64,12288,0.017387733856836955
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,64,10240,0.006118399898211161
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,128,768,0.005366399884223938
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,128,1024,0.005761066575845083
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,64,8192,0.005883733431498209
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,64,10240,0.017581866184870402
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,64,8192,0.01727786660194397
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,128,512,0.005115733544031779
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,128,256,0.004814933240413666
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,16,128,128,0.004615466793378194
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,64,7168,0.0057322666049003605
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,64,7168,0.017851734161376955
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,64,6144,0.005628799895445505
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,64,6144,0.01743040084838867
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,64,5120,0.005974400043487549
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,64,5120,0.017684266964594523
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,64,4096,0.0056202664971351625
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,64,4096,0.016886399189631144
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,64,3584,0.006087466577688853
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,64,3584,0.016790399948755898
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,64,3072,0.005769599974155426
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,64,3072,0.016426666577657064
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,64,2560,0.005645866692066193
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,64,2560,0.016033066312472026
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,64,2048,0.005065600077311197
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,64,2048,0.015638400117556253
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,64,1536,0.0044821331898371375
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,64,1536,0.015321600437164306
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,64,1024,0.0037685332198937735
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,64,1024,0.015152000387509666
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,64,768,0.00352960005402565
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,64,768,0.014697600404421488
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,64,512,0.0031968000034491217
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,64,512,0.014604799946149192
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,64,256,0.0029567999144395193
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,64,256,0.014470400412877402
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,64,128,0.0027829334139823914
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,64,64,0.0026229334374268847
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,64,128,0.014364799857139588
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,64,64,0.014281599720319112
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,64,32,0.0026101333399613695
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,64,32,0.014281599720319112
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,32,65536,0.00788266658782959
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,32,65536,0.021064533789952596
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,32,16384,0.006108800073464712
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,32,16384,0.018080000082651773
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,32,12288,0.005861333509286245
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,32,12288,0.01753173271814982
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,32,10240,0.005921066800753275
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,32,10240,0.017515732844670614
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,32,8192,0.0058378666639328
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,32,8192,0.017145599921544394
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,32,7168,0.005745066702365876
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,32,7168,0.017884800831476845
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,32,6144,0.00561599979797999
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,32,6144,0.017602133750915527
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,32,5120,0.005948799848556519
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,32,5120,0.01771093408266703
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,32,4096,0.0055871998270352686
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,32,4096,0.017296000321706136
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,32,3584,0.00603413333495458
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,32,3584,0.016849066813786825
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,32,3072,0.0056970665852228795
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,32,3072,0.01634773313999176
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,32,2560,0.005670399963855743
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,32,2560,0.015990400314331056
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,32,2048,0.005042133231957754
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,32,2048,0.015703466534614564
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,32,1536,0.004439466694990794
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,32,1536,0.015405866503715514
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,32,1024,0.0038560000558694207
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,32,1024,0.015040000279744467
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,32,768,0.0035189333061377203
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,32,768,0.014734933773676554
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,32,512,0.003202133377393087
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,32,512,0.014694399634997048
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,32,256,0.002930133293072383
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,32,32,0.014243200421333313
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,32,256,0.014506666858990987
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,32,128,0.002739199995994568
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,32,128,0.014371200402577718
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,32,64,0.002583466718594233
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,16,32,64,0.014270933469136557
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,16,32,32,0.002648533384005229
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,65536,16384,0.7481056213378906
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,65536,16384,0.39137067794799807
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,65536,12288,0.2986079851786295
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,65536,12288,0.5700650533040365
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,65536,10240,0.47314879099527996
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,65536,10240,0.25005440711975097
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,65536,7168,0.32970879872639974
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,65536,8192,0.37942721048990885
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,65536,8192,0.20698986053466797
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,65536,16384,0.3552821477254232
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,65536,12288,0.2679690678914388
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,65536,10240,0.2623701254526774
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,65536,7168,0.18048639297485353
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,65536,6144,0.2847669283548991
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,65536,6144,0.15699092547098797
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,65536,8192,0.1985589345296224
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,65536,5120,0.23698026339213052
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,65536,7168,0.15872213045756023
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,65536,6144,0.1373311996459961
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,65536,5120,0.1335360050201416
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,65536,4096,0.10931200186411541
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,65536,4096,0.1920095920562744
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,65536,3584,0.16759360631306966
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,65536,3584,0.10315732955932617
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,65536,5120,0.1155402660369873
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,65536,3072,0.14469973246256512
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,65536,3072,0.08655040264129639
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,65536,4096,0.09366827011108399
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,65536,2560,0.12094826698303222
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,65536,3584,0.08815466562906901
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,65536,2560,0.07506559689839681
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,65536,2048,0.09696640173594157
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,65536,3072,0.0827349344889323
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,65536,2048,0.06285013357798258
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,65536,1536,0.07445759773254394
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,65536,2560,0.06153279940287272
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,65536,1536,0.053741868336995444
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,65536,2048,0.051747198899586996
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,65536,1024,0.0506880005200704
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,65536,1024,0.03944106499354045
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,65536,768,0.04011840025583903
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,65536,1536,0.03974506855010986
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,65536,768,0.03381119966506958
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,65536,1024,0.028729599714279175
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,65536,512,0.027266132831573486
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,65536,512,0.027401600281397504
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,65536,768,0.022992000977198283
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,65536,256,0.0153546671072642
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,65536,256,0.02099519968032837
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,65536,512,0.01696746746699015
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,65536,128,0.009009066224098205
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,65536,256,0.012160000205039979
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,65536,128,0.017874133586883546
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,65536,64,0.006311466793219249
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,65536,64,0.016088533401489257
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,65536,128,0.009657599528630574
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,65536,32,0.004538666705290476
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,65536,32,0.01634773313999176
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,16384,65536,0.7400010426839192
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,16384,65536,0.3816565195719401
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,16384,16384,0.18484479586283367
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,16384,16384,0.1071349302927653
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,16384,12288,0.14049280484517415
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,16384,12288,0.08492159843444824
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,16384,65536,0.35778347651163733
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,16384,16384,0.09408640066782634
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,16384,10240,0.12793280283610026
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,16384,10240,0.07285226980845133
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,16384,10240,0.06091093222300211
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,16384,7168,0.05752213398615519
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,16384,8192,0.09435199896494548
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,16384,8192,0.061592535177866614
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,16384,12288,0.0842357317606608
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,16384,8192,0.05793600082397461
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,16384,7168,0.08293866316477458
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,16384,6144,0.07260800202687581
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,16384,6144,0.050213332970937094
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,16384,7168,0.044361599286397296
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,16384,5120,0.0604149341583252
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,16384,5120,0.04407039880752563
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,16384,4096,0.04917226632436116
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,16384,4096,0.03826453288396199
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,16384,6144,0.039018666744232176
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,16384,3584,0.043391998608907065
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,16384,3584,0.035702399412790936
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,16384,5120,0.03344000180562337
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,16384,3072,0.03771200180053711
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,16384,3072,0.032758400837580366
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,16384,2560,0.03199359973271688
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,16384,2560,0.029821866750717164
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,16384,2048,0.02648640076319377
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,16384,4096,0.027906133731206255
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,16384,3584,0.025430399179458617
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,16384,2048,0.026121600468953447
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,16384,1536,0.02064746618270874
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,16384,1536,0.023253333568573
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,16384,3072,0.022409600019454957
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,16384,2560,0.019796266158421835
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,16384,2048,0.016427733500798545
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,16384,1024,0.01665493349234263
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,16384,1024,0.0206496000289917
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,16384,768,0.011594667037328085
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,16384,1536,0.013579733173052468
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,16384,768,0.019130667050679527
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,16384,512,0.00858026643594106
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,16384,512,0.016722132762273155
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,16384,128,0.003403733422358831
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,16384,1024,0.010505599776903789
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,16384,256,0.005864533285299936
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,16384,256,0.015081600348154704
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,16384,768,0.009276800354321798
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,16384,128,0.014907733599344889
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,16384,128,0.005031466484069824
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,16384,64,0.0032149332265059153
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,16384,32,0.01495253344376882
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,16384,64,0.014688000082969666
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,16384,32,0.0031509332358837126
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,16384,512,0.007625600198904674
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,12288,16384,0.14549120267232257
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,12288,65536,0.293557325998942
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,12288,65536,0.5607018788655599
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,12288,16384,0.08749226729075113
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,16384,256,0.0053162669142087305
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,12288,12288,0.11088000138600666
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,12288,12288,0.06767146587371826
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,12288,65536,0.28437439600626624
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,12288,10240,0.09276800155639649
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,12288,8192,0.05527253150939941
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,12288,16384,0.07505173683166504
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,12288,10240,0.058974933624267575
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,12288,8192,0.07595307032267253
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,12288,12288,0.06665493249893188
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,12288,6144,0.04152106841405233
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,12288,7168,0.03615146478017171
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,12288,7168,0.06666560173034668
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,12288,7168,0.04557226498921712
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,12288,10240,0.04862506786982219
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,12288,6144,0.05419306755065918
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,12288,8192,0.04017706712086995
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,12288,5120,0.04594133297602336
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,12288,5120,0.036761601765950516
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,12288,4096,0.03774506648381551
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,12288,4096,0.032451200485229495
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,12288,3584,0.0325984001159668
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,12288,6144,0.03163839975992839
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,12288,3584,0.030502400795618695
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,12288,5120,0.027332266171773274
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,12288,3072,0.029054933786392213
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,12288,4096,0.02299413283665975
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,12288,3072,0.02810346682866414
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,12288,3584,0.02125119964281718
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,12288,2560,0.02444266676902771
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,12288,2560,0.02569173375765483
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,12288,3072,0.018669867515563966
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,12288,2048,0.020300799608230592
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,12288,2048,0.023422932624816893
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,12288,2560,0.016662399967511496
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,12288,1536,0.015991466244061787
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,12288,1536,0.02131519913673401
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,12288,2048,0.013857066631317139
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,12288,1024,0.011725866794586181
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,12288,1024,0.01946880022684733
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,12288,1536,0.011717333396275838
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,12288,768,0.009960533181826273
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,12288,1024,0.00929813285668691
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,12288,768,0.01826453407605489
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,12288,512,0.00761706680059433
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,12288,512,0.015285332997639975
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,12288,256,0.004007466634114583
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,12288,256,0.015121066570281982
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,12288,768,0.008125866452852886
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,12288,256,0.005245866874853769
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,12288,128,0.003385599950949351
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,12288,512,0.005613866448402405
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,12288,128,0.014922666549682616
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,12288,64,0.00322026660044988
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,12288,64,0.014819199840227762
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,12288,32,0.0031818665564060213
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,12288,128,0.0050016000866889955
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,12288,32,0.014862933754920959
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,10240,65536,0.2528906663258871
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,10240,65536,0.47723948160807295
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,10240,65536,0.23803946177164712
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,10240,16384,0.12422400315602619
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,10240,16384,0.07637973626454672
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,10240,12288,0.09483306407928467
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,10240,12288,0.05976106723149618
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,10240,12288,0.05667093197504679
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,10240,16384,0.06449919939041138
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,10240,10240,0.09385279814402261
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,10240,10240,0.05486933390299479
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,10240,8192,0.06484586795171102
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,10240,8192,0.04453013340632121
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,10240,10240,0.04112533330917358
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,10240,7168,0.05367786486943563
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,10240,7168,0.04059946537017822
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,10240,7168,0.03172159989674886
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,10240,6144,0.0470197319984436
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,10240,8192,0.0352234681447347
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,10240,6144,0.037123199303944907
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,10240,5120,0.04087893168131511
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,10240,5120,0.033854933579762776
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,10240,5120,0.023998934030532836
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,10240,6144,0.027888000011444092
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,10240,4096,0.031908265749613446
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,10240,4096,0.02972053289413452
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,10240,4096,0.020348799228668214
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,10240,3584,0.028465066353480024
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,10240,3584,0.028221867481867474
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,10240,3584,0.018835200866063436
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,10240,3072,0.024939733743667602
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,10240,3072,0.02569920023282369
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,10240,2560,0.02113173405329386
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,10240,2560,0.023630932966868082
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,10240,1536,0.020541866620381675
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,10240,2048,0.017898666858673095
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,10240,3072,0.01637333333492279
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,10240,2048,0.02201706568400065
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,10240,2560,0.014662399888038635
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,10240,1536,0.01383786698182424
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,10240,2048,0.012542933225631714
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,10240,1024,0.010038399696350097
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,10240,1024,0.01853546698888143
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,10240,768,0.00846613347530365
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,10240,768,0.016746666034062704
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,10240,1536,0.010785067081451416
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,10240,512,0.006680533289909363
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,10240,1024,0.009410132964452107
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,10240,128,0.0032778667906920114
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,10240,512,0.015264000495274863
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,10240,768,0.0067071999112765
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,10240,128,0.01472106675306956
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,10240,256,0.003538133452335993
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,10240,256,0.014813866217931113
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,10240,512,0.00555733342965444
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,10240,64,0.0030303999781608583
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,10240,32,0.0029440000653266907
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,10240,64,0.014568533500035605
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,10240,32,0.014731733004252115
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,10240,256,0.0051818668842315676
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,8192,65536,0.20101440747578941
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,8192,65536,0.37399466832478845
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,8192,16384,0.0924618641535441
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,10240,128,0.004891733328501383
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,8192,16384,0.060378666718800864
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,8192,12288,0.07032426993052164
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,8192,65536,0.1843466599782308
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,8192,12288,0.05254826545715332
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,8192,16384,0.050988801320393885
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,8192,10240,0.05914773146311442
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,8192,10240,0.04411413272221883
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,8192,8192,0.04793066581090291
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,8192,8192,0.04149653514226277
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,8192,7168,0.04240106741587321
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,8192,12288,0.04589866797129313
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,8192,7168,0.03531519969304402
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,8192,10240,0.034151466687520345
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,8192,6144,0.03729386727015178
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,8192,6144,0.0325653334458669
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,8192,8192,0.02850666642189026
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,8192,5120,0.0314410666624705
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,8192,7168,0.025539199511210125
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,8192,5120,0.02955946723620097
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,8192,4096,0.02573973337809245
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,8192,6144,0.022900267442067464
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,8192,4096,0.02638186613718669
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,8192,3584,0.022907733917236328
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,8192,3584,0.024962133169174193
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,8192,3584,0.015566933155059814
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,8192,3072,0.020252799987792967
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,8192,5120,0.01997013290723165
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,8192,3072,0.023515733083089192
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,8192,4096,0.01741973360379537
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,8192,2048,0.02031360069910685
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,8192,2560,0.01726933320363363
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,8192,3072,0.013953066865603127
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,8192,2560,0.022013866901397706
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,8192,2048,0.014114133516947427
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,8192,1536,0.011478400230407715
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,8192,1536,0.019062399864196777
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,8192,1024,0.008594133456548055
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,8192,1024,0.017435733477274576
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,8192,768,0.007283199826876323
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,8192,768,0.014990933736165366
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,8192,2560,0.01251200040181478
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,8192,512,0.005783466498057047
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,8192,2048,0.010985599954922994
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,8192,1536,0.009852799773216247
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,8192,512,0.015052800377209982
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,8192,256,0.003458133339881897
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,8192,256,0.014965333541234336
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,8192,1024,0.007515733440717061
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,8192,768,0.005684266487757364
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,8192,128,0.0031669333577156065
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,8192,512,0.005418666700522105
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,8192,256,0.0051146666208903
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,8192,128,0.014728533228238425
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,8192,64,0.0030346666773160298
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,8192,32,0.0030165334542592366
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,8192,64,0.014566399653752646
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,8192,32,0.014458666245142618
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,7168,65536,0.18432854016621908
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,7168,65536,0.32700907389322914
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,7168,65536,0.1637152036031087
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,7168,16384,0.0902997334798177
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,7168,16384,0.05758613348007202
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,8192,128,0.004902400076389313
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,7168,12288,0.06619626681009928
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,7168,12288,0.047958401838938396
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,7168,10240,0.054973868529001865
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,7168,16384,0.04573226769765218
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,7168,10240,0.04187306563059489
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,7168,8192,0.0458901325861613
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,7168,8192,0.035545599460601804
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,7168,7168,0.04014613231023152
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,7168,7168,0.033160533507665
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,7168,12288,0.04124053319295247
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,7168,6144,0.035922133922576906
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,7168,10240,0.030819199482599896
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,7168,8192,0.025547732909520466
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,7168,6144,0.03066986600557963
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,7168,5120,0.030264532566070555
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,7168,5120,0.028051199515660603
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,7168,7168,0.0233514666557312
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,7168,4096,0.02336853345235189
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,7168,4096,0.025094399849573772
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,7168,6144,0.020840533574422202
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,7168,5120,0.018374399344126383
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,7168,3584,0.020801067352294922
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,7168,3584,0.024667733907699586
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,7168,4096,0.015447466572125753
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,7168,3072,0.018321067094802856
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,7168,3584,0.014169599612553915
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,7168,3072,0.02286613384882609
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,7168,2560,0.01569919983545939
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,7168,2560,0.021093332767486574
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,7168,3072,0.012689066926638284
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,7168,2048,0.013210666179656983
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,7168,2048,0.019986132780710854
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,7168,2560,0.011645866433779399
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,7168,1536,0.010711466272672016
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,7168,1536,0.01884053349494934
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,7168,1024,0.008080000181992848
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,7168,1024,0.01539520025253296
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,7168,768,0.006899199883143107
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,7168,2048,0.010236799716949463
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,7168,768,0.015131733814875283
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,7168,1536,0.009283199906349182
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,7168,512,0.0043594668308893835
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,7168,512,0.014972800016403198
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,7168,1024,0.0065994665026664736
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,7168,256,0.0033813332517941795
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,7168,256,0.014653866489728292
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,7168,64,0.0029578665892283124
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,7168,128,0.0030026666820049284
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,7168,128,0.014524799585342408
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,7168,32,0.0028714666763941447
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,7168,64,0.014491732915242514
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,7168,768,0.005826133489608765
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,7168,32,0.014553599556287131
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,7168,512,0.005532800157864889
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,6144,65536,0.280515193939209
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,7168,256,0.005100800096988678
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,6144,65536,0.15709120432535809
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,7168,128,0.00487253318230311
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,6144,16384,0.0754858652750651
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,6144,16384,0.05192213455835978
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,6144,16384,0.04074560006459554
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,6144,65536,0.14300373395284016
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,6144,12288,0.06702933311462403
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,6144,12288,0.04246826569239299
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,6144,10240,0.04965546528498332
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,6144,10240,0.037855998675028486
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,6144,8192,0.03903146584828694
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,6144,8192,0.03364693323771159
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,6144,7168,0.03407680193583171
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,6144,7168,0.03174826701482137
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,6144,12288,0.03582613468170166
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,6144,10240,0.027731200059254963
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,6144,6144,0.02983466585477193
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,6144,6144,0.029072000583012896
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,6144,8192,0.022954666614532472
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,6144,5120,0.02524799903233846
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,6144,5120,0.02675093412399292
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,6144,4096,0.021399466196695964
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,6144,4096,0.02430400053660075
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,6144,4096,0.013896532853444419
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,6144,7168,0.021064533789952596
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,6144,6144,0.01882666746775309
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,6144,3584,0.019205333789189656
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,6144,5120,0.016407466928164163
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,6144,3584,0.02321066657702128
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,6144,3584,0.01291306714216868
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,6144,2048,0.012349866827329
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,6144,3072,0.016679465770721436
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,6144,3072,0.02177706758181254
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,6144,2560,0.014517333110173544
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,6144,2560,0.020434133211771646
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,6144,2048,0.019172267119089762
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,6144,3072,0.011669333775838215
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,6144,1536,0.009913600484530131
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,6144,1536,0.01802026629447937
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,6144,1024,0.007567999760309856
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,6144,1024,0.015431466698646545
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,6144,2560,0.011000532905260723
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,6144,2048,0.00955839951833089
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,6144,768,0.006388266881306966
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,6144,1536,0.008740267157554627
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,6144,768,0.015017599860827128
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,6144,1024,0.005985066791375478
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,6144,512,0.003681066632270813
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,6144,512,0.014954666296641031
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,6144,768,0.005548800031344095
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,6144,256,0.0032799998919169106
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,6144,512,0.005456000069777171
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,6144,256,0.014774399995803832
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,6144,128,0.0030794667700926462
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,6144,128,0.014428800344467163
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,6144,64,0.0028362666567166646
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,6144,64,0.014512000481287637
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,6144,32,0.0028970666229724885
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,6144,32,0.01446399986743927
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,6144,256,0.005032533407211303
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,5120,65536,0.23938345909118652
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,5120,65536,0.13762027422587078
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,6144,128,0.004806399842103322
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,5120,16384,0.06354879935582479
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,5120,16384,0.045516800880432126
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,5120,12288,0.056017065048217775
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,5120,12288,0.037299199899037676
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,5120,65536,0.1240949312845866
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,5120,10240,0.041460267702738446
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,5120,10240,0.03344853321711223
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,5120,8192,0.033725865681966144
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,5120,8192,0.029711999495824176
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,5120,16384,0.035370667775472
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,5120,7168,0.02797546585400899
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,5120,12288,0.028360533714294433
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,5120,7168,0.02768213351567586
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,5120,10240,0.024650667111078897
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,5120,6144,0.02474986712137858
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,5120,6144,0.025969066222508747
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,5120,8192,0.022004266579945884
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,5120,7168,0.01890773375829061
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,5120,5120,0.021118932962417604
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,5120,5120,0.02408000032107035
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,5120,4096,0.017494400342305504
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,5120,6144,0.016691199938456216
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,5120,3072,0.013875200351079305
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,5120,4096,0.02222613294919332
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,5120,3072,0.010875733693440755
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,5120,5120,0.014693333705266317
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,5120,3584,0.015786666671435037
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,5120,3584,0.021348265806833903
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,5120,3072,0.0207370658715566
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,5120,2560,0.012078932921091716
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,5120,2560,0.01956693331400553
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,5120,2560,0.010238933563232421
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,5120,2048,0.010171733299891154
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,5120,2048,0.01851200064023336
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,5120,1536,0.008590933680534363
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,5120,1536,0.01685439944267273
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,5120,1024,0.015213867028554281
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,5120,4096,0.012545067071914672
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,5120,1024,0.00665280024210612
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,5120,768,0.005143466591835022
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,5120,3584,0.012078932921091716
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,5120,768,0.015103999773661295
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,5120,768,0.005614933371543884
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,5120,512,0.0035904000202814737
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,5120,2048,0.008955732981363932
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,5120,512,0.014889599879582724
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,5120,1536,0.007646933197975159
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,5120,1024,0.0061482667922973635
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,5120,256,0.003186133255561193
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,5120,256,0.014709333578745524
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,5120,256,0.005129600067933401
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,5120,128,0.0029440000653266907
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,5120,128,0.014446933070818582
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,5120,512,0.005330133438110352
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,5120,128,0.00489279975493749
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,5120,64,0.0028181334336598715
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,5120,64,0.014512000481287637
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,5120,32,0.0028938665986061097
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,5120,32,0.014340266585350037
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,4096,65536,0.18688106536865234
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,4096,65536,0.10960746606190999
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,4096,16384,0.049779200553894044
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,4096,16384,0.03898133436838786
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,4096,12288,0.03935466607411702
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,4096,10240,0.033090132474899295
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,4096,12288,0.03353493213653565
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,4096,10240,0.030446932713190718
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,4096,8192,0.027427200476328534
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,4096,8192,0.026899200677871705
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,4096,7168,0.024647466341654458
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,4096,7168,0.025421865781148273
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,4096,65536,0.10663466453552246
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,4096,16384,0.030826665957768756
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,4096,6144,0.022046933571497597
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,4096,6144,0.024022400379180908
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,4096,5120,0.01918720006942749
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,4096,12288,0.02842880090077718
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,4096,10240,0.021631999810536703
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,4096,8192,0.018141865730285645
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,4096,5120,0.022336000204086305
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,4096,7168,0.016759467124938966
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,4096,4096,0.015824000040690102
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,4096,6144,0.015201066931088766
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,4096,4096,0.02091946601867676
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,4096,3584,0.013064533472061157
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,4096,5120,0.01325759987036387
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,4096,3584,0.0202293336391449
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,4096,4096,0.0115146666765213
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,4096,3072,0.01170133352279663
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,4096,3072,0.01949013272921244
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,4096,2560,0.010179199775060018
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,4096,2560,0.01864746610323588
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,4096,2048,0.00876693328221639
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,4096,3584,0.01111893355846405
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,4096,2048,0.01774719953536987
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,4096,1536,0.007518933216730754
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,4096,1536,0.016039466857910155
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,4096,1024,0.015380266308784484
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,4096,3072,0.010172800223032633
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,4096,1024,0.005983999868233999
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,4096,2560,0.009732266267140705
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,4096,768,0.0038250667353471124
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,4096,2048,0.00787199983994166
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,4096,768,0.015076266725858054
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,4096,768,0.005589333176612854
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,4096,512,0.0035487999518712364
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,4096,1536,0.006909866631031036
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,4096,512,0.01490239997704824
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,4096,256,0.003110400090614955
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,4096,1024,0.005793066819508871
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,4096,256,0.01467626690864563
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,4096,128,0.00290133332212766
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,4096,128,0.014417066176732381
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,4096,64,0.0027424000203609467
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,4096,512,0.005297066768010458
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,4096,64,0.014545067151387533
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,4096,32,0.0027242665489514667
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,4096,256,0.00492799977461497
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,4096,32,0.014579199751218162
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,3584,65536,0.1686250686645508
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,3584,65536,0.10147626399993896
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,3584,16384,0.04786026477813721
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,4096,128,0.004686933259169261
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,3584,16384,0.03596373399098714
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,3584,16384,0.0299615999062856
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,3584,65536,0.10260053475697835
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,3584,12288,0.03548479874928792
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,3584,12288,0.032077866792678836
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,3584,10240,0.030613332986831665
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,3584,10240,0.028759467601776122
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,3584,12288,0.023937066396077476
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,3584,8192,0.024791467189788818
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,3584,8192,0.025355732440948485
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,3584,7168,0.02249493400255839
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,3584,7168,0.024251733223597208
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,3584,10240,0.02095573345820109
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,3584,8192,0.016746666034062704
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,3584,7168,0.016198399662971496
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,3584,6144,0.019771732886632285
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,3584,6144,0.02262186606725057
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,3584,5120,0.017271467049916587
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,3584,5120,0.021673599878946938
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,3584,6144,0.014487466216087342
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,3584,4096,0.013369599978129068
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,3584,4096,0.020227199792861937
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,3584,3584,0.012005333105723064
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,3584,3584,0.01976213256518046
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,3584,4096,0.011455999811490376
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,3584,3072,0.010819199681282043
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,3584,5120,0.013062399625778199
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,3584,3072,0.019180800517400107
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,3584,2560,0.00956053336461385
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,3584,2560,0.018346667289733887
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,3584,2048,0.008340266346931458
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,3584,3584,0.010594133536020916
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,3584,2048,0.01611840029557546
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,3584,3072,0.009980799754460652
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,3584,2560,0.009265066186587016
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,3584,1536,0.007063466807206471
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,3584,1536,0.01584106683731079
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,3584,1024,0.004649599889914194
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,3584,2048,0.007253333429495494
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,3584,1024,0.015287466843922935
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,3584,768,0.0038122666378815973
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,3584,768,0.015154133240381876
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,3584,1536,0.00680213322242101
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,3584,512,0.00352960005402565
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,3584,512,0.014918399850527444
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,3584,1024,0.006117333471775055
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,3584,768,0.005558399856090546
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,3584,256,0.003197866678237915
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,3584,256,0.014450132846832275
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,3584,512,0.005363200108210246
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,3584,128,0.0029386666913827257
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,3584,128,0.014538666605949402
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,3584,64,0.0026591998835404714
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,3584,256,0.005005866785844167
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,3584,64,0.01444586714108785
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,3584,32,0.0027615999182065325
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,3584,32,0.014409599701563516
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,3072,65536,0.1453269322713216
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,3584,128,0.004823466638724009
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,3072,65536,0.0886624018351237
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,3072,16384,0.040009601910909014
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,3072,16384,0.03351893424987793
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,3072,12288,0.031729066371917726
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,3072,12288,0.03026026686032613
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,3072,65536,0.09417706330617269
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,3072,10240,0.026255999008814496
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,3072,16384,0.027801599105199176
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,3072,10240,0.02680533329645793
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,3072,10240,0.019435733556747437
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,3072,8192,0.016544000307718913
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,3072,12288,0.022338134050369263
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,3072,8192,0.022131200631459555
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,3072,8192,0.02485439976056417
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,3072,7168,0.019912532965342202
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,3072,7168,0.023613866170247397
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,3072,6144,0.017566933234532674
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,3072,7168,0.015188266833623251
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,3072,6144,0.013708800077438354
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,3072,6144,0.022392533222834268
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,3072,5120,0.015480533242225647
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,3072,5120,0.020804266134897866
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,3072,5120,0.012533332904179892
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,3072,4096,0.013676800330479941
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,3072,4096,0.020163200298945107
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,3072,3584,0.010217600067456563
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,3072,4096,0.011028266946474711
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,3072,3584,0.012567466497421265
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,3072,3584,0.018925867478052773
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,3072,3072,0.011322666207949321
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,3072,3072,0.018576000134150186
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,3072,2560,0.00888213316599528
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,3072,2560,0.017768534024556477
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,3072,2048,0.007699200014273326
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,3072,2048,0.01623040040334066
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,3072,3072,0.009530666470527648
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,3072,2560,0.008671999971071879
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,3072,1536,0.0067562664548556015
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,3072,1536,0.015610667069753012
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,3072,2048,0.007005866865317028
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,3072,1024,0.004299733539422353
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,3072,1024,0.015155200163523355
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,3072,1536,0.00661653329928716
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,3072,768,0.003789866715669632
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,3072,768,0.01502826710542043
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,3072,1024,0.0059125334024429325
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,3072,512,0.0034954667091369627
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,3072,512,0.014862933754920959
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,3072,768,0.005430399874846141
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,3072,128,0.014428800344467163
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,3072,256,0.0031583999594052637
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,3072,256,0.014590932925542196
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,3072,32,0.0027413333455721537
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,3072,512,0.005206400156021118
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,3072,32,0.014469333489735923
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,2560,65536,0.1209450642267863
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,3072,128,0.0028778667251269023
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,3072,64,0.0027317332724730173
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,3072,256,0.005116799970467886
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,3072,64,0.01452906628449758
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,3072,128,0.004726399978001913
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,2560,65536,0.07642026742299399
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,2560,16384,0.034399998188018796
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,2560,65536,0.09270186424255371
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,2560,16384,0.03054506580034892
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,2560,12288,0.026294400294621784
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,2560,12288,0.027213867505391436
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,2560,16384,0.02747093240420024
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,2560,10240,0.02279040018717448
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,2560,12288,0.02190613349278768
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,2560,10240,0.024663466215133666
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,2560,8192,0.018965333700180054
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,2560,8192,0.022780799865722658
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,2560,10240,0.01904639999071757
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,2560,7168,0.017369600137074788
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,2560,7168,0.021882667144139608
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,2560,7168,0.015054933230082192
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,2560,6144,0.015421866377194723
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,2560,6144,0.02102933327356974
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,2560,8192,0.015348266561826071
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,2560,6144,0.013644799590110779
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,2560,5120,0.013651200135548911
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,2560,5120,0.020218666394551596
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,2560,4096,0.011588266491889954
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,2560,4096,0.019156267245610557
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,2560,5120,0.012284800410270691
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,2560,3584,0.010867200295130412
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,2560,3584,0.018605866034825645
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,2560,3584,0.010219732920328777
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,2560,4096,0.0107722669839859
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,2560,3072,0.010185600320498148
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,2560,3072,0.016685867309570314
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,2560,2560,0.008289066453774769
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,2560,2560,0.016874667008717856
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,2560,3072,0.008893866340319316
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,2560,2048,0.007229866584142049
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,2560,2048,0.01600213348865509
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,2560,2560,0.008449066678682964
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,2560,1536,0.005499733487764994
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,2560,1536,0.015568000078201295
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,2560,1024,0.004109866668780645
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,2560,2048,0.007036800185839335
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,2560,1024,0.015074132879575094
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,2560,768,0.0037952000896135964
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,2560,1536,0.006504533191521962
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,2560,768,0.015065600474675497
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,2560,512,0.0034954667091369627
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,2560,1024,0.005901866654555003
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,2560,512,0.015040000279744467
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,2560,512,0.005288533369700114
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,2560,256,0.0030858665704727173
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,2560,64,0.002721066772937775
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,2560,256,0.014621866742769876
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,2560,128,0.0029109333952267963
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,2560,128,0.014344533284505209
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,2560,768,0.005538133283456167
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,2560,64,0.014465066790580749
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,2560,32,0.0027029333015282948
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,2560,256,0.005011199911435445
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,2560,32,0.014521599809328715
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,2560,128,0.004722133278846741
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,2048,65536,0.0966858704884847
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,2048,65536,0.06287039915720621
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,2048,16384,0.027268266677856444
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,2048,16384,0.02695573369661967
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,2048,65536,0.08509973684946695
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,2048,12288,0.021783467133839926
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,2048,12288,0.024078933397928874
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,2048,10240,0.019093332688013713
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,2048,10240,0.022826667626698813
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,2048,16384,0.025195733706156416
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,2048,8192,0.01566506624221802
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,2048,8192,0.021290665864944457
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,2048,8192,0.01535146633783976
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,2048,6144,0.01994453271230062
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,2048,7168,0.014392532904942832
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,2048,7168,0.02058239976565043
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,2048,12288,0.0210698664188385
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,2048,10240,0.017484800020853678
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,2048,6144,0.012915199995040894
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,2048,5120,0.011768533786137899
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,2048,5120,0.019403733809789023
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,2048,3584,0.00949013332525889
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,2048,4096,0.010214400291442872
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,2048,7168,0.014110933740933737
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,2048,4096,0.018627200524012247
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,2048,4096,0.010353066523869832
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,2048,3584,0.016979199647903443
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,2048,3072,0.008880000313123066
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,2048,6144,0.012871467073758445
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,2048,3072,0.016266666849454246
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,2048,5120,0.011727999647458394
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,2048,2560,0.00755626658598582
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,2048,2048,0.015915733575820924
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,2048,2560,0.01618559956550598
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,2048,3584,0.00937066674232483
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,2048,3072,0.00841813286145528
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,2048,2048,0.006548266609509785
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,2048,1536,0.004681600133577982
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,2048,2560,0.007849599917729695
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,2048,1536,0.015503999590873719
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,2048,1536,0.006423466900984447
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,2048,2048,0.006941866874694824
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,2048,1024,0.004117333392302195
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,2048,1024,0.015054933230082192
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,2048,768,0.0036960000793139136
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,2048,768,0.014984533190727234
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,2048,1024,0.005678933362166087
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,2048,512,0.0033226666351159418
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,2048,512,0.014817066987355552
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,2048,768,0.005341866612434387
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,2048,256,0.003047466774781545
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,2048,256,0.00484799991051356
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,2048,256,0.014725333452224732
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,2048,128,0.002791466563940048
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,2048,128,0.01458026667435964
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,2048,512,0.005107200145721436
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,2048,64,0.002700799951950709
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,2048,32,0.0026464000344276427
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,2048,64,0.014376533031463624
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,2048,32,0.014538666605949402
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,1536,65536,0.074507737159729
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,2048,128,0.004717866579691568
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,1536,65536,0.052730667591094973
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,1536,16384,0.02215786576271057
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,1536,16384,0.024206932385762533
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,1536,65536,0.08349973360697428
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,1536,12288,0.01800959904988607
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,1536,12288,0.022449066241582237
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,1536,10240,0.015521066387494406
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,1536,10240,0.021093332767486574
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,1536,16384,0.024604799350102742
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,1536,8192,0.013166933258374532
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,1536,8192,0.020296533902486168
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,1536,12288,0.02002133329709371
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,1536,6144,0.019130667050679527
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,1536,7168,0.012172800302505494
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,1536,10240,0.01742080052693685
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,1536,7168,0.01957226594289144
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,1536,6144,0.011126400033632914
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,1536,8192,0.015041066209475198
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,1536,5120,0.01014293332894643
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,1536,5120,0.01954560081164042
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,1536,4096,0.009012266993522644
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,1536,4096,0.0174453337987264
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,1536,7168,0.013863466183344521
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,1536,3584,0.00855573316415151
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,1536,6144,0.012607999642690024
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,1536,5120,0.010985599954922994
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,1536,3584,0.016555733482042947
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,1536,4096,0.009408000111579894
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,1536,3072,0.007974400122960409
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,1536,3072,0.016504533092180886
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,1536,3584,0.009018666545550029
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,1536,2560,0.0068351998925209045
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,1536,3072,0.008111999928951263
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,1536,2560,0.016176000237464905
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,1536,2048,0.005389866729577383
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,1536,2048,0.015920000274976094
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,1536,1536,0.004699733356634776
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,1536,1536,0.01552959978580475
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,1536,1536,0.006403199831644694
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,1536,1024,0.004080000023047129
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,1536,1024,0.015030399958292643
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,1536,2560,0.00769706666469574
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,1536,768,0.0037130666275819145
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,1536,768,0.01502293348312378
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,1536,2048,0.006855466465155284
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,1536,512,0.0034474665919939675
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,1536,512,0.014731733004252115
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,1536,1024,0.005729066828886667
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,1536,256,0.002996266633272171
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,1536,256,0.014591999848683677
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,1536,768,0.005458133419354757
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,1536,128,0.002865066627661387
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,1536,512,0.005128533144791921
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,1536,128,0.014284800489743552
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,1536,256,0.004820266862710317
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,1536,64,0.0026389333109060925
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,1536,64,0.014432000120480857
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,1536,128,0.004830933113892873
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,1536,32,0.002661333233118057
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,1536,32,0.014525866508483887
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,1024,65536,0.04979306856791178
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,1024,65536,0.040277334054311116
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,1024,16384,0.015753600001335143
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,1024,65536,0.0833685318628947
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,1024,16384,0.021505065759023032
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,1024,10240,0.019631999731063842
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,1024,12288,0.013337600231170654
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,1024,12288,0.020090667406717937
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,1024,16384,0.024690133333206177
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,1024,10240,0.011972266435623168
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,1024,8192,0.010446932911872864
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,1024,8192,0.018822399775187175
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,1024,12288,0.01977919936180115
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,1024,7168,0.009797333677609762
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,1024,10240,0.017262933651606242
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,1024,7168,0.018077866236368815
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,1024,6144,0.008712533116340637
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,1024,8192,0.014876799782117209
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,1024,6144,0.01774719953536987
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,1024,7168,0.013112533092498779
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,1024,5120,0.008389332890510559
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,1024,5120,0.017986132701237997
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,1024,4096,0.00740479975938797
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,1024,6144,0.011691733201344808
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,1024,4096,0.01729066570599874
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,1024,5120,0.010513066252072652
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,1024,3584,0.006965333223342895
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,1024,3584,0.01707306702931722
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,1024,3072,0.006415999929110209
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,1024,2560,0.0075914666056633
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,1024,4096,0.009290666381518046
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,1024,3584,0.008869333068529765
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,1024,3072,0.016772266228993735
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,1024,1536,0.004759466648101807
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,1024,2560,0.00610453337430954
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,1024,2560,0.01601599951585134
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,1024,2048,0.005308799942334493
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,1024,2048,0.01602026621500651
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,1024,3072,0.008074666559696197
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,1024,1536,0.015613866845766702
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,1024,2048,0.006930133203665416
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,1024,1024,0.004109866668780645
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,1024,1536,0.006409599880377452
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,1024,1024,0.015119999647140503
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,1024,768,0.0036447999378045404
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,1024,768,0.015030399958292643
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,1024,1024,0.005747200051943461
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,1024,512,0.0033546666304270422
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,1024,512,0.01480959951877594
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,1024,768,0.005411200225353241
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,1024,256,0.002940800040960312
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,1024,256,0.014613333344459533
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,1024,128,0.002867199977238973
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,1024,64,0.014344533284505209
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,1024,128,0.01446399986743927
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,1024,512,0.005118933320045471
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,1024,64,0.0026880001028378804
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,1024,256,0.004962133367856344
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,1024,32,0.002690133452415466
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,1024,32,0.014326399564743042
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,768,65536,0.0390122652053833
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,1024,128,0.004727466901143392
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,768,65536,0.03536213239034017
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,768,65536,0.08323840300242105
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,768,16384,0.013156267007191977
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,768,16384,0.020826667547225952
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,768,12288,0.011320533355077107
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,768,12288,0.019063466787338258
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,768,16384,0.024590933322906496
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,768,10240,0.010364799698193868
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,768,10240,0.018207999070485432
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,768,12288,0.01986453334490458
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,768,8192,0.00923520028591156
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,768,8192,0.017171200116475424
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,768,7168,0.008569600184758504
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,768,10240,0.01694613297780355
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,768,7168,0.017854932943979898
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,768,7168,0.013117866714795432
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,768,8192,0.014266666769981385
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,768,6144,0.007866666714350382
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,768,6144,0.01795733372370402
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,768,5120,0.007282133400440216
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,768,5120,0.017937066157658894
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,768,6144,0.011567999919255573
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,768,4096,0.00626986672480901
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,768,4096,0.017182934284210204
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,768,5120,0.010523733496665955
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,768,3584,0.006549333532651265
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,768,3584,0.01704533298810323
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,768,4096,0.009176533420880635
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,768,3072,0.0060597335298856105
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,768,3072,0.016426666577657064
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,768,3584,0.008802133798599242
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,768,2560,0.006043733159701029
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,768,2560,0.016359466314315795
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,768,3072,0.008032000064849854
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,768,2048,0.005355733136336008
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,768,2048,0.015847466389338174
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,768,1536,0.0046623999873797095
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,768,2560,0.007653333246707916
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,768,1536,0.01570026675860087
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,768,2048,0.006914133330186208
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,768,1024,0.003982933362325033
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,768,1024,0.01506239970525106
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,768,768,0.003671466559171677
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,768,768,0.014946132898330688
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,768,1536,0.006460799773534138
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,768,1024,0.0057429333527882894
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,768,512,0.0034314667185147605
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,768,768,0.0053951998551686605
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,768,512,0.01482133368651072
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,768,256,0.003083733220895131
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,768,512,0.005169066786766052
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,768,256,0.01458560029665629
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,768,128,0.0027701333165168762
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,768,128,0.01458346645037333
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,768,256,0.004924799998601278
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,768,64,0.0026335999369621276
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,768,128,0.004820266862710317
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,768,64,0.01448853313922882
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,768,32,0.0027477333943049112
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,768,32,0.014536533753077188
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,512,65536,0.02768320043881734
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,512,65536,0.028406399488449096
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,512,16384,0.010296533505121868
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,512,16384,0.019118932882944743
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,512,65536,0.08359573682149252
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,512,10240,0.016884267330169678
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,512,12288,0.00918826659520467
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,512,12288,0.01755946675936381
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,512,16384,0.024029866854349772
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,512,10240,0.00848426620165507
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,512,12288,0.019151999553044637
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,512,10240,0.017870932817459106
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,512,8192,0.007585066556930542
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,512,8192,0.017333332697550455
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,512,7168,0.0067210664351781205
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,512,7168,0.018322134017944337
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,512,6144,0.006099199752012888
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,512,8192,0.01404906709988912
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,512,6144,0.017709867159525553
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,512,7168,0.012896000345547994
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,512,5120,0.0064533332983652755
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,512,5120,0.018194133043289186
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,512,6144,0.01165120005607605
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,512,4096,0.005929600199063619
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,512,4096,0.017267199357350667
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,512,3072,0.016639999548594155
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,512,5120,0.01037440001964569
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,512,3584,0.006516266862551372
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,512,4096,0.009181867043177288
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,512,3584,0.017087999979654947
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,512,3072,0.006097066899140676
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,512,3584,0.00888213316599528
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,512,2560,0.005974400043487549
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,512,2560,0.016100266575813295
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,512,2048,0.005371733506520589
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,512,2048,0.015962666273117064
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,512,3072,0.00801386684179306
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,512,1536,0.004729599754015604
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,512,2560,0.007587199906508128
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,512,1536,0.015624533096949259
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,512,2048,0.006728533407052357
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,512,1024,0.004033066580692927
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,512,1024,0.015324800213177999
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,512,1536,0.0064746667941411335
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,512,768,0.003671466559171677
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,512,768,0.015092266599337259
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,512,1024,0.005717333157857259
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,512,512,0.003387733300526937
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,512,512,0.01471573313077291
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,512,768,0.005256533126036326
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,512,256,0.002979200085004171
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,512,512,0.0050784001747767125
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,512,256,0.014505599935849508
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,512,256,0.004749866823355356
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,512,128,0.0028597332537174227
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,512,128,0.014396799604098
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,512,128,0.0048096001148223875
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,512,64,0.0026986666023731233
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,512,64,0.014577066898345948
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,512,32,0.0027583998938401537
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,512,32,0.014371200402577718
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,256,65536,0.016909867525100708
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,256,65536,0.02294293244679769
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,256,16384,0.023829332987467446
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,256,16384,0.007438933352629344
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,256,65536,0.08284266789754233
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,256,16384,0.017754666010538735
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,256,12288,0.00626986672480901
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,256,12288,0.01749546726544698
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,256,10240,0.0062943999965985615
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,256,10240,0.017909334103266398
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,256,12288,0.01893226703008016
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,256,8192,0.00613973339398702
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,256,8192,0.017422932386398315
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,256,10240,0.016489600141843162
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,256,7168,0.005982933441797892
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,256,7168,0.017869865894317626
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,256,8192,0.014099199573198953
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,256,6144,0.005907199780146281
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,256,6144,0.017391999562581383
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,256,7168,0.012733866771062216
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,256,4096,0.017288533846537273
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,256,5120,0.0062730665008227035
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,256,6144,0.011634133259455363
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,256,5120,0.01798080007235209
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,256,4096,0.0059008002281188965
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,256,3584,0.006322133541107178
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,256,3584,0.016939733425776163
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,256,5120,0.010402133067448933
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,256,2560,0.016731733083724977
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,256,4096,0.009206400314966837
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,256,3072,0.005949866771697998
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,256,3072,0.016710400581359863
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,256,3584,0.008728532989819845
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,256,2560,0.005998933315277099
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,256,2048,0.0054058666030565895
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,256,3072,0.008021333316961924
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,256,2048,0.015795200069745382
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,256,2048,0.006741333504517872
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,256,1536,0.00462719996770223
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,256,2560,0.007639466722806294
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,256,1536,0.015440000096956888
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,256,1536,0.0063178668419520065
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,256,1024,0.004067199925581614
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,256,1024,0.0151829332113266
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,256,768,0.003676799933115641
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,256,768,0.014965333541234336
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,256,768,0.005321600039800008
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,256,512,0.0033258666594823206
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,256,1024,0.0057429333527882894
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,256,512,0.014725333452224732
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,256,256,0.0030218665798505146
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,256,512,0.005016533533732096
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,256,256,0.014417066176732381
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,256,32,0.002629333237806956
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,256,256,0.004759466648101807
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,256,128,0.002757333219051361
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,256,128,0.014342400431632995
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,256,64,0.0026154667139053345
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,256,64,0.014345600207646688
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,256,128,0.004733866453170777
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,256,32,0.014388266205787658
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,128,12288,0.005809066692988077
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,128,65536,0.011699199676513672
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,128,65536,0.01991893251736959
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,128,16384,0.005892266829808554
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,128,16384,0.017735467354456583
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,128,65536,0.08303146362304688
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,128,16384,0.023826134204864503
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,128,12288,0.01750613252321879
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,128,10240,0.006061866879463196
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,128,12288,0.019013333320617675
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,128,10240,0.017492266496022542
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,128,10240,0.01638826628526052
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,128,8192,0.005971199770768484
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,128,8192,0.017153066396713258
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,128,7168,0.0057781333724657696
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,128,7168,0.017413334051767985
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,128,8192,0.013948800166447959
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,128,6144,0.005659733215967814
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,128,7168,0.012725333372751871
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,128,6144,0.017198934157689413
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,128,6144,0.011554132898648579
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,128,5120,0.0060032000144322716
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,128,5120,0.017672532796859743
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,128,5120,0.010375466942787171
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,128,4096,0.005605333546797434
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,128,4096,0.017244799931844076
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,128,3584,0.006086400151252747
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,128,3584,0.016620799899101257
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,128,4096,0.009150933225949604
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,128,3072,0.0056970665852228795
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,128,2048,0.0051146666208903
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,128,3072,0.016323199868202208
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,128,3584,0.008697600166002909
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,128,3072,0.008010666569073994
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,128,2560,0.005701333284378052
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,128,2560,0.01593066652615865
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,128,2048,0.015732266505559287
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,128,1536,0.00450133333603541
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,128,1024,0.00561706672112147
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,128,2560,0.007558399935563405
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,128,1536,0.015362133582433065
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,128,2048,0.006782933572928111
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,128,1024,0.0038954667747020722
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,128,1024,0.015025066335995993
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,128,1536,0.006384000182151794
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,128,768,0.003610666592915853
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,128,768,0.014922666549682616
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,128,512,0.0031744000812371576
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,128,768,0.005257600049177805
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,128,512,0.01451520025730133
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,128,256,0.0028789333999156954
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,128,256,0.014497066537539164
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,128,512,0.005096533397833506
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,128,128,0.0027701333165168762
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,128,128,0.014381866653760275
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,128,64,0.0026176000634829206
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,128,256,0.00481386681397756
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,8,128,128,0.004635733366012573
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,128,64,0.014398933450380961
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,128,32,0.0026261332134405774
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,128,32,0.014354133605957031
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,64,65536,0.008541867136955261
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,64,65536,0.01889386574427287
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,64,16384,0.005871999760468801
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,64,16384,0.017541333039601644
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,64,12288,0.005726933479309082
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,64,12288,0.017514665921529136
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,64,10240,0.005904000004132589
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,64,10240,0.01769066651662191
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,64,8192,0.005894400179386139
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,64,8192,0.017059199015299478
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,64,7168,0.005746133128801982
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,64,7168,0.017427200078964235
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,64,6144,0.0055754666527112326
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,64,6144,0.017132800817489625
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,64,5120,0.005936000247796377
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,64,5120,0.017372800906499227
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,64,4096,0.005555200080076853
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,64,4096,0.0172650674978892
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,64,3584,0.006093866626421611
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,64,3584,0.016545066237449647
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,64,3072,0.005714133381843567
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,64,3072,0.016084266702334087
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,64,2560,0.0056533331672350565
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,64,2560,0.016132266322771708
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,64,2048,0.005016533533732096
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,64,2048,0.01569066643714905
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,64,1536,0.004435199995835622
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,64,1536,0.015361066659291586
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,64,1024,0.003819733361403147
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,64,1024,0.014843733112017313
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,64,768,0.0035082665582497918
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,64,768,0.014918399850527444
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,64,512,0.003123199939727783
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,64,512,0.014685866236686707
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,64,256,0.002850133428970973
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,64,256,0.014364799857139588
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,64,128,0.0027413333455721537
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,64,128,0.01421333352724711
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,64,64,0.002600533266862233
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,64,64,0.014500266313552857
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,64,32,0.002550400048494339
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,64,32,0.014215466380119324
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,32,65536,0.007169066866238911
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,32,65536,0.01931519905726115
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,32,16384,0.005749333401521047
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,32,16384,0.01755733291308085
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,32,8192,0.016999467213948568
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,32,12288,0.005736533304055532
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,32,12288,0.01741546591122945
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,32,10240,0.005866666634877523
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,32,10240,0.017378133535385133
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,32,8192,0.005749333401521047
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,32,7168,0.005676800012588501
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,32,7168,0.01741546591122945
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,32,6144,0.0055744002262751256
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,32,6144,0.017178666591644288
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,32,5120,0.0059349333246548975
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,32,5120,0.017458132902781167
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,32,4096,0.005526400109132131
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,32,4096,0.016759467124938966
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,32,3584,0.005973333120346069
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,32,2048,0.01551253298918406
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,32,3584,0.016383999586105348
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,32,3072,0.005655466516812643
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,32,1024,0.0037471999724706015
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,32,3072,0.016189866264661155
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,32,2560,0.005667200187842051
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,32,2560,0.016059733430544534
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,32,2048,0.004972800115744273
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,32,1536,0.004435199995835622
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,32,1536,0.015109333395957946
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,32,1024,0.014903466900189719
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,32,768,0.0034272000193595886
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,32,768,0.014804266889890037
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,32,64,0.0025386666258176167
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,32,512,0.0032373333970705668
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,32,512,0.014564266800880432
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,32,256,0.002850133428970973
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,32,256,0.014475733041763306
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,32,128,0.0027477333943049112
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,32,128,0.01409066617488861
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,32,64,0.0143477330605189
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,65536,12288,0.5696896235148112
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,8,32,32,0.0025920001169045764
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,8,32,32,0.014398933450380961
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,65536,16384,0.7477301279703776
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,65536,16384,0.390887451171875
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,65536,12288,0.2972341219584147
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,65536,10240,0.4754325230916341
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,65536,16384,0.3594719886779785
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,65536,10240,0.24922134081522623
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,65536,8192,0.38493226369222006
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,65536,12288,0.2712618509928385
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,65536,8192,0.20377066930135093
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,65536,6144,0.15594879786173504
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,65536,10240,0.22742080688476562
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,65536,7168,0.33091627756754555
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,65536,7168,0.17914026578267414
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,65536,8192,0.18128533363342286
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,65536,6144,0.2838399887084961
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,65536,7168,0.15874667167663575
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,65536,5120,0.2368384043375651
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,65536,5120,0.13330026467641193
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,65536,6144,0.1374623934427897
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,65536,5120,0.13476266860961914
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,65536,4096,0.1914581298828125
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,65536,4096,0.10933226744333904
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,65536,3584,0.16711893081665039
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,65536,2560,0.1208352009455363
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,65536,3584,0.09821546872456868
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,65536,4096,0.1089290698369344
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,65536,3072,0.14423680305480957
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,65536,3072,0.08668906688690185
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,65536,3584,0.0827455997467041
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,65536,2560,0.07448639869689941
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,65536,3072,0.08335999647776285
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,65536,2048,0.0966976006825765
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,65536,2048,0.06252906719843546
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,65536,1536,0.039162667592366536
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,65536,1024,0.03936426639556885
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,65536,2560,0.06134080092112223
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,65536,1536,0.07424000104268393
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,65536,2048,0.057538131872812905
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,65536,1536,0.0514847993850708
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,65536,512,0.02846933404604594
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,65536,1024,0.05034346580505371
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,65536,768,0.03957013289133708
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,65536,768,0.03365439971288045
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,65536,512,0.027219200134277345
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,65536,256,0.015320533514022827
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,65536,256,0.02095680038134257
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,65536,1024,0.02840106685956319
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,65536,768,0.025218133131663007
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,65536,128,0.00905386706193288
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,65536,512,0.01686720053354899
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,65536,128,0.01989013353983561
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,65536,128,0.009489066402117411
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,65536,256,0.012078932921091716
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,65536,64,0.00625493327776591
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,65536,32,0.004487466812133789
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,65536,64,0.018811732530593872
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,65536,32,0.018871466318766274
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,16384,65536,0.7482175827026367
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,16384,65536,0.37707945505777996
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,16384,16384,0.1846336046854655
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,16384,10240,0.12818666299184162
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,16384,16384,0.10685333410898845
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,16384,16384,0.09347626368204752
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,16384,12288,0.14013439814249676
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,16384,65536,0.35531946818033855
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,16384,12288,0.08509120146433512
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,16384,10240,0.07274026870727539
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,16384,8192,0.09436053435007731
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,16384,8192,0.06118933359781901
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,16384,6144,0.07200106779734293
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,16384,12288,0.08402026494344075
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,16384,10240,0.06061760187149048
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,16384,6144,0.038696531454722086
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,16384,7168,0.09763733545939127
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,16384,7168,0.055719467004140225
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,16384,8192,0.04966080188751221
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,16384,6144,0.05000746647516886
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,16384,7168,0.044234665234883626
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,16384,5120,0.06028693517049154
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,16384,5120,0.04376000165939331
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,16384,4096,0.049073068300882976
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,16384,5120,0.0332586665948232
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,16384,4096,0.038483198483784994
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,16384,3584,0.04319893519083659
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,16384,3584,0.03562560081481934
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,16384,4096,0.02769920031229655
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,16384,3584,0.02513493299484253
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,16384,3072,0.03770666519800822
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,16384,3072,0.032577067613601685
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,16384,2560,0.03173440098762512
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,16384,2560,0.029469867547353108
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,16384,2048,0.025910399357477826
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,16384,3072,0.02230506738026937
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,16384,2048,0.026369067033131917
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,16384,2048,0.016446933150291443
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,16384,2560,0.019562667608261107
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,16384,1536,0.020411733786265054
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,16384,1536,0.02365760008494059
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,16384,1536,0.01362666686375936
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,16384,1024,0.01443839967250824
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,16384,1024,0.020539732774098714
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,16384,768,0.011480533083279927
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,16384,1024,0.010643200079600016
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,16384,768,0.01920106609662374
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,16384,512,0.008678399523099263
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,16384,512,0.01625386675198873
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,16384,768,0.009262933333714803
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,16384,256,0.005885866781075796
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,16384,64,0.003147733211517334
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,16384,512,0.0073632001876831055
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,16384,256,0.015289599696795145
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,16384,128,0.003610666592915853
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,16384,128,0.01482133368651072
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,16384,256,0.005411200225353241
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,16384,64,0.014890666802724203
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,16384,32,0.0031082667410373688
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,16384,32,0.014915200074513755
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,16384,128,0.004993066688378652
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,12288,65536,0.5582186381022136
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,12288,65536,0.2925514539082845
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,12288,16384,0.1438591957092285
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,12288,16384,0.08725120226542155
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,12288,12288,0.11002986431121826
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,12288,12288,0.0670026699701945
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,12288,10240,0.10474133491516113
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,12288,65536,0.2850602785746256
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,12288,16384,0.0763808012008667
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,12288,10240,0.0587658683458964
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,12288,8192,0.07511146863301596
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,12288,8192,0.04900053342183431
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,12288,12288,0.0671498696009318
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,12288,10240,0.04868799845377604
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,12288,7168,0.07729706764221192
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,12288,7168,0.045373864968617755
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,12288,8192,0.040343467394510904
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,12288,6144,0.03171413342158
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,12288,6144,0.055130668481191004
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,12288,6144,0.04208853244781494
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,12288,5120,0.048979198932647704
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,12288,5120,0.036600534121195474
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,12288,7168,0.0359007994333903
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,12288,4096,0.036907732486724854
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,12288,4096,0.03235519925753276
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,12288,5120,0.027189334233601887
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,12288,3584,0.03324906627337138
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,12288,3584,0.030614399909973146
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,12288,3072,0.028600533803304035
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,12288,3072,0.027986133098602296
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,12288,4096,0.02289173404375712
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,12288,2560,0.024590933322906496
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,12288,2560,0.025465599695841473
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,12288,2048,0.020252799987792967
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,12288,2048,0.0234826664129893
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,12288,3584,0.021014400323232017
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,12288,2560,0.01632533371448517
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,12288,1536,0.01586666703224182
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,12288,1536,0.021350399653116862
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,12288,3072,0.018580265839894614
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,12288,1024,0.011730133493741354
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,12288,2048,0.013645866513252258
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,12288,1024,0.01937920053799947
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,12288,1536,0.011496532956759136
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,12288,1024,0.009346133470535279
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,12288,768,0.009988266229629516
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,12288,768,0.018131200472513834
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,12288,512,0.007694933315118153
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,12288,512,0.01516266663869222
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,12288,768,0.008854400118192036
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,12288,512,0.005898666878541311
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,12288,256,0.0040853333969910945
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,12288,256,0.015293866395950317
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,12288,256,0.0052159999807675685
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,12288,128,0.00346666673819224
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,12288,128,0.014964266618092855
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,12288,128,0.004875733455022176
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,12288,64,0.003223466624816259
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,12288,64,0.014908799529075622
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,12288,32,0.0032127998769283296
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,12288,32,0.014936533570289613
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,10240,65536,0.2492608070373535
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,10240,65536,0.4738922754923503
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,10240,16384,0.12409813404083252
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,10240,16384,0.07639146645863851
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,10240,12288,0.09407040278116861
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,10240,65536,0.23379200299580893
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,10240,16384,0.06310506661732992
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,10240,12288,0.06142400105794271
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,10240,10240,0.07884159882863363
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,10240,10240,0.05263359944025675
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,10240,8192,0.06417493422826132
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,10240,8192,0.04895573457082113
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,10240,12288,0.05748159885406494
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,10240,7168,0.053407998879750576
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,10240,10240,0.04239893356959025
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,10240,7168,0.04056959946950277
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,10240,8192,0.034168533484141034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,10240,6144,0.04696640173594157
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,10240,7168,0.03093973398208618
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,10240,6144,0.037113598982493085
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,10240,5120,0.03925439914067586
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,10240,5120,0.033054933945337935
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,10240,5120,0.023825067281723022
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,10240,6144,0.027056000630060834
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,10240,4096,0.031845333178838094
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,10240,4096,0.02919999957084656
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,10240,3584,0.02821333408355713
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,10240,3584,0.027280000845591228
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,10240,3072,0.024737066030502318
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,10240,3072,0.02547093431154887
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,10240,4096,0.020216532548268638
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,10240,3584,0.018321067094802856
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,10240,2560,0.021050665775934854
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,10240,2560,0.023887999852498374
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,10240,3072,0.01646293302377065
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,10240,2048,0.017292799552281697
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,10240,2048,0.022021333376566567
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,10240,2048,0.012289067109425861
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,10240,2560,0.014805333813031516
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,10240,1536,0.013742933670679728
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,10240,1536,0.0200000007947286
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,10240,1024,0.010182399551073711
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,10240,1536,0.010636799534161885
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,10240,1024,0.018634666999181114
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,10240,768,0.008609066406885784
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,10240,768,0.016116266449292503
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,10240,1024,0.008711466193199157
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,10240,512,0.00653546651204427
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,10240,768,0.006902400155862172
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,10240,512,0.01507306694984436
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,10240,256,0.0035861333211263022
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,10240,512,0.005774933099746704
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,10240,256,0.015001599987347921
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,10240,256,0.005093333125114441
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,10240,128,0.0032640000184377036
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,10240,128,0.014499200383822122
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,10240,64,0.0030261332790056865
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,10240,128,0.004774400095144907
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,10240,64,0.01476800044377645
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,10240,32,0.002977066735426585
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,10240,32,0.014865066607793173
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,8192,65536,0.37236054738362634
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,8192,65536,0.19906026522318523
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,8192,16384,0.09217173258463542
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,8192,16384,0.0605621337890625
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,8192,65536,0.18582720756530763
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,8192,12288,0.07030293146769205
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,8192,16384,0.05076479911804199
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,8192,12288,0.05006080071131388
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,8192,10240,0.058797868092854824
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,8192,10240,0.04357440074284871
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,8192,12288,0.04611733357111613
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,8192,10240,0.034322134653727215
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,8192,8192,0.05612373352050781
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,8192,8192,0.03913173278172811
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,8192,8192,0.02943466703097026
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,8192,5120,0.031197865804036457
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,8192,7168,0.042268800735473636
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,8192,7168,0.03484586477279663
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,8192,6144,0.03707306782404582
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,8192,6144,0.032339199384053545
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,8192,5120,0.029608533779780073
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,8192,7168,0.025974400838216144
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,8192,5120,0.019808000326156615
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,8192,6144,0.02263573408126831
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,8192,4096,0.02547733386357625
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,8192,4096,0.02617173393567403
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,8192,3584,0.022820266087849934
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,8192,3584,0.0247978667418162
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,8192,4096,0.017052799463272095
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,8192,3072,0.020148267348607383
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,8192,3584,0.016179200013478598
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,8192,3072,0.02328959902127584
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,8192,3072,0.01388159990310669
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,8192,2560,0.017207467555999757
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,8192,2560,0.021976532538731892
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,8192,2048,0.014193066954612732
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,8192,2560,0.012503467003504434
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,8192,2048,0.02029013236363729
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,8192,1536,0.011429333686828613
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,8192,1536,0.01909760038057963
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,8192,1024,0.008605866630872091
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,8192,2048,0.010812800129254658
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,8192,1024,0.00764160007238388
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,8192,1536,0.009645866354306538
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,8192,1024,0.01591253379980723
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,8192,768,0.007257600128650665
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,8192,768,0.01568000018596649
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,8192,512,0.005670399963855743
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,8192,512,0.015191466609636942
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,8192,256,0.0035264000296592714
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,8192,768,0.005895466605822245
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,8192,256,0.014871467153231302
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,8192,256,0.005128533144791921
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,8192,128,0.0032437334458033243
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,8192,512,0.0055637334783871974
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,8192,128,0.014723199605941772
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,8192,64,0.002951466788848241
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,8192,64,0.014645333091417948
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,8192,128,0.004758400221665701
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,8192,32,0.002919466545184453
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,8192,32,0.014657066265741984
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,7168,65536,0.3262858708699544
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,7168,65536,0.182424529393514
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,7168,16384,0.08903253078460693
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,7168,16384,0.05618133147557577
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,7168,65536,0.16331413586934407
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,7168,12288,0.06798293590545654
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,7168,12288,0.046463998158772786
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,7168,16384,0.045360000928243
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,7168,10240,0.054958931605021154
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,7168,12288,0.041553068161010745
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,7168,10240,0.041303467750549314
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,7168,8192,0.05034346580505371
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,7168,8192,0.0352351983388265
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,7168,8192,0.026154667139053345
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,7168,10240,0.030652799208958942
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,7168,7168,0.03962666591008504
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,7168,7168,0.033192533254623416
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,7168,6144,0.035240534941355386
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,7168,7168,0.023150932788848878
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,7168,6144,0.03075946569442749
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,7168,5120,0.02977919975916545
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,7168,5120,0.02759360074996948
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,7168,6144,0.02067199945449829
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,7168,5120,0.017966934045155845
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,7168,4096,0.023116799195607503
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,7168,4096,0.024886399507522583
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,7168,3584,0.020750933885574342
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,7168,4096,0.015513599912325541
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,7168,3584,0.02371946573257446
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,7168,3072,0.01844266653060913
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,7168,3584,0.01409280002117157
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,7168,3072,0.022410666942596434
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,7168,3072,0.012707199652989706
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,7168,2560,0.015757866700490317
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,7168,1536,0.01875413258870443
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,7168,2560,0.021406932671864828
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,7168,2048,0.013092266519864401
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,7168,2048,0.019782400131225585
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,7168,2560,0.011766399939854939
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,7168,1536,0.010645332932472228
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,7168,2048,0.010284800330797832
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,7168,1024,0.007997866471608479
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,7168,1024,0.016371200482050575
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,7168,1536,0.00921493371327718
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,7168,768,0.0068810666600863145
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,7168,1024,0.006844800213972728
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,7168,768,0.015481600165367126
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,7168,768,0.006000000238418579
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,7168,512,0.004183466732501984
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,7168,512,0.015204266707102457
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,7168,256,0.0034186666210492453
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,7168,256,0.014769066373507181
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,7168,512,0.005713066458702088
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,7168,256,0.005223466455936432
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,7168,128,0.003171200056870779
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,7168,128,0.014541866381963095
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,7168,64,0.0028629332780838014
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,7168,128,0.004830933113892873
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,7168,64,0.01458346645037333
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,7168,32,0.002903466671705246
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,7168,32,0.014594133694966635
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,6144,65536,0.2796426773071289
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,6144,65536,0.15669439633687338
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,6144,12288,0.04235519965489705
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,6144,16384,0.07482773462931315
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,6144,16384,0.05137493213017782
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,6144,65536,0.1424832026163737
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,6144,12288,0.057146668434143066
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,6144,16384,0.04043413400650024
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,6144,10240,0.049156268437703446
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,6144,10240,0.03812586863835653
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,6144,10240,0.027450666824976606
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,6144,8192,0.03941440184911092
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,6144,8192,0.03533866802851359
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,6144,12288,0.03650346597035726
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,6144,8192,0.022873600323994957
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,6144,5120,0.025312000513076784
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,6144,7168,0.03482133150100708
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,6144,7168,0.031436800956726074
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,6144,6144,0.03030933340390523
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,6144,6144,0.028616533676783247
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,6144,6144,0.018703999121983846
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,6144,7168,0.02090453306833903
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,6144,5120,0.02654613256454468
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,6144,4096,0.02129279971122742
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,6144,4096,0.02430400053660075
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,6144,5120,0.016390400131543477
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,6144,3584,0.019153066476186118
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,6144,3584,0.022974934180577597
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,6144,4096,0.013986133535703025
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,6144,3072,0.016899200280507405
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,6144,3584,0.013077333569526672
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,6144,3072,0.022111999988555908
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,6144,3072,0.011567999919255573
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,6144,2560,0.014341333508491516
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,6144,2560,0.020555732647577922
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,6144,2560,0.010980266332626342
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,6144,1024,0.007484800120194752
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,6144,2048,0.012339199582735699
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,6144,2048,0.019474132855733236
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,6144,1536,0.009989333152770997
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,6144,1536,0.018348799149195353
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,6144,2048,0.009639466802279156
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,6144,1024,0.01571626663208008
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,6144,1024,0.0065738668044408154
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,6144,768,0.0063285330931345625
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,6144,768,0.015440000096956888
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,6144,768,0.005590400099754334
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,6144,1536,0.00865066647529602
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,6144,512,0.0038880000511805216
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,6144,512,0.015030399958292643
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,6144,256,0.00335359995563825
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,6144,256,0.014732799927393594
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,6144,256,0.004952533543109894
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,6144,512,0.0054730668663978575
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,6144,128,0.003053866575161616
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,6144,128,0.014528000354766845
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,6144,128,0.004692266881465912
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,6144,64,0.002922666569550832
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,6144,64,0.014549332857131957
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,6144,32,0.0027850667635599775
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,6144,32,0.014723199605941772
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,5120,65536,0.23759039243062338
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,5120,65536,0.13601813316345215
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,5120,16384,0.06341866652170816
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,5120,65536,0.12361173629760742
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,5120,16384,0.04503039916356404
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,5120,12288,0.04848853349685669
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,5120,8192,0.03352853457132975
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,5120,12288,0.03731413284937541
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,5120,16384,0.03545813163121541
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,5120,10240,0.04346239964167277
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,5120,12288,0.03207040031750997
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,5120,10240,0.03326399922370911
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,5120,8192,0.029499733448028566
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,5120,10240,0.02446720004081726
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,5120,7168,0.028064000606536865
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,5120,7168,0.02784213423728943
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,5120,6144,0.024758400519688924
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,5120,8192,0.020489599307378134
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,5120,6144,0.02601813276608785
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,5120,7168,0.018683733542760213
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,5120,5120,0.021130667130152384
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,5120,3584,0.015780267119407655
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,5120,5120,0.02397866646448771
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,5120,6144,0.016771199305852254
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,5120,4096,0.017458132902781167
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,5120,4096,0.022482132911682128
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,5120,5120,0.014629333217938741
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,5120,3584,0.021342933177947998
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,5120,3072,0.013974400361378989
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,5120,4096,0.012707199652989706
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,5120,3584,0.012113066514333089
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,5120,3072,0.020399999618530274
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,5120,2560,0.012126933534940083
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,5120,2560,0.019715199867884316
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,5120,3072,0.01097920040289561
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,5120,2048,0.010372266173362732
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,5120,2048,0.018703999121983846
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,5120,1536,0.00857919951279958
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,5120,1536,0.017115734020868936
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,5120,2560,0.010366933544476827
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,5120,1536,0.007833600044250488
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,5120,2048,0.008896000186602275
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,5120,1024,0.006788266698519389
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,5120,1024,0.015250133474667868
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,5120,768,0.005134933193524678
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,5120,1024,0.0062826668222745255
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,5120,768,0.01515733301639557
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,5120,512,0.003643733263015747
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,5120,512,0.015106133619944253
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,5120,512,0.005378133555253347
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,5120,768,0.005824000140031179
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,5120,256,0.003306666761636734
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,5120,256,0.01460693379243215
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,5120,128,0.002997333308060964
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,5120,128,0.014774399995803832
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,5120,256,0.005070933202902476
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,5120,64,0.0028661333024501802
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,5120,64,0.014484266440073649
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,5120,128,0.004841599861780802
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,5120,32,0.0028618666032950084
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,5120,32,0.014405333002408347
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,4096,65536,0.186245330174764
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,4096,65536,0.10812266667683919
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,4096,65536,0.10625387032826741
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,4096,16384,0.049621331691741946
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,4096,16384,0.03864853382110596
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,4096,16384,0.030898133913675945
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,4096,12288,0.03870720068613688
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,4096,12288,0.033124266068140666
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,4096,10240,0.032816000779469806
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,4096,12288,0.028204800685246785
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,4096,10240,0.030375466744105024
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,4096,8192,0.02714346647262573
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,4096,8192,0.026985599597295122
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,4096,10240,0.021615999937057494
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,4096,8192,0.018057600657145182
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,4096,7168,0.024488532543182374
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,4096,7168,0.02511253356933594
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,4096,6144,0.02169493238131205
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,4096,7168,0.016753067572911583
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,4096,6144,0.02378773291905721
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,4096,5120,0.018902399142583213
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,4096,6144,0.01495573321978251
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,4096,5120,0.022308266162872313
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,4096,5120,0.013186132907867432
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,4096,4096,0.015801599621772765
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,4096,4096,0.020859734217325846
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,4096,3584,0.01316159963607788
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,4096,4096,0.011730133493741354
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,4096,3584,0.02012373407681783
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,4096,3584,0.011273599664370219
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,4096,3072,0.01188266674677531
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,4096,3072,0.019338667392730713
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,4096,3072,0.01011199951171875
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,4096,2560,0.010317867000897724
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,4096,2560,0.018874667088190713
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,4096,2048,0.008788266777992248
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,4096,2048,0.017787732680638633
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,4096,2560,0.009612799684206644
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,4096,1536,0.007593599955240886
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,4096,1536,0.015986133615175882
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,4096,2048,0.007942399879296621
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,4096,1024,0.006001066664854685
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,4096,1024,0.01529706617196401
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,4096,1536,0.007047466437021891
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,4096,768,0.003957333415746689
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,4096,768,0.014918399850527444
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,4096,1024,0.005966933568318685
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,4096,768,0.0055861334005991616
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,4096,512,0.0035103999078273775
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,4096,128,0.014491732915242514
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,4096,512,0.01506239970525106
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,4096,256,0.0031744000812371576
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,4096,512,0.005217066903909048
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,4096,256,0.01471573313077291
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,4096,128,0.002903466671705246
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,4096,256,0.004939733445644379
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,4096,64,0.0027615999182065325
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,4096,128,0.0046528001626332605
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,4096,64,0.014737066626548768
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,4096,32,0.0027637332677841187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,4096,32,0.0144896000623703
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,3584,65536,0.16780907313028973
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,3584,65536,0.09956053098042807
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,3584,65536,0.0966912031173706
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,3584,16384,0.04735146760940552
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,3584,16384,0.03618026574452718
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,3584,12288,0.03459946711858113
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,3584,16384,0.028437334299087524
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,3584,8192,0.025553067525227863
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,3584,12288,0.03094186584154765
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,3584,10240,0.029626667499542236
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,3584,12288,0.02648746569951375
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,3584,10240,0.02855573296546936
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,3584,10240,0.021206400791803994
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,3584,8192,0.02447893420855204
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,3584,8192,0.01688213348388672
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,3584,7168,0.022233599424362184
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,3584,7168,0.02409279942512512
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,3584,6144,0.019666133324305217
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,3584,7168,0.016260266304016113
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,3584,6144,0.022742400566736855
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,3584,5120,0.01712426741917928
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,3584,6144,0.014571733276049294
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,3584,5120,0.021579732497533165
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,3584,5120,0.013176533579826354
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,3584,4096,0.013142399986584983
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,3584,4096,0.020294400056203206
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,3584,4096,0.011482666929562886
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,3584,3584,0.012053333719571431
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,3584,3584,0.019688532749811808
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,3584,3072,0.010994133353233338
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,3584,3584,0.011205333471298217
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,3584,3072,0.018911999464035035
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,3584,3072,0.010063999891281128
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,3584,2560,0.00965013305346171
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,3584,2560,0.018267732858657838
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,3584,2048,0.008303999900817871
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,3584,2560,0.009125333031018574
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,3584,2048,0.016850133736928306
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,3584,1536,0.007123200098673503
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,3584,2048,0.007699200014273326
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,3584,1536,0.015643733739852905
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,3584,1024,0.005145599941412607
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,3584,1024,0.015229866902033488
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,3584,1536,0.007115733126799266
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,3584,768,0.003886933376391729
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,3584,1024,0.0059797331690788266
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,3584,768,0.015042133132616677
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,3584,768,0.005658666789531708
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,3584,512,0.0035285333792368567
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,3584,512,0.014852266510327658
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,3584,256,0.003138133386770884
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,3584,256,0.01477226714293162
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,3584,512,0.005399466554323832
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,3584,128,0.002902399996916453
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,3584,256,0.0048991998036702475
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,3584,128,0.014520532886187234
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,3584,128,0.004780800143877665
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,3584,64,0.002674133330583572
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,3584,64,0.01474346617857615
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,3584,32,0.0027562665442625684
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,3584,32,0.014591999848683677
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,3072,65536,0.14369920094807942
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,3072,65536,0.0868554671605428
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,3072,65536,0.09438506762186685
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,3072,16384,0.03959893385569255
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,3072,16384,0.03293333252271016
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,3072,10240,0.02649173339207967
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,3072,16384,0.02800319989522298
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,3072,12288,0.032313599189122515
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,3072,12288,0.028816000620524092
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,3072,10240,0.026133332649866742
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,3072,12288,0.024621866146723428
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,3072,8192,0.021921066443125407
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,3072,8192,0.025510400533676147
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,3072,10240,0.01971199909845988
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,3072,7168,0.019774933656056724
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,3072,7168,0.023541333278020223
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,3072,8192,0.01653439998626709
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,3072,6144,0.017451733350753784
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,3072,7168,0.014613333344459533
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,3072,6144,0.022137600183486938
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,3072,6144,0.013876266280810037
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,3072,5120,0.015278933445612588
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,3072,5120,0.020987733205159505
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,3072,4096,0.013706666231155396
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,3072,4096,0.01955946683883667
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,3072,5120,0.012499200304349263
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,3072,3584,0.01250986655553182
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,3072,4096,0.01069546639919281
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,3072,3584,0.0195413331190745
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,3072,3072,0.011255466938018798
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,3072,2048,0.007703466713428498
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,3072,3584,0.010594133536020916
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,3072,3072,0.01846933364868164
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,3072,2560,0.008938666184743245
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,3072,3072,0.009488000472386678
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,3072,2560,0.01695573329925537
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,3072,2560,0.008500267068545024
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,3072,2048,0.015793066223462424
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,3072,2048,0.007607466479142506
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,3072,768,0.0038421332836151125
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,3072,1536,0.006716800232728322
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,3072,1536,0.015662933389345803
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,3072,1536,0.006595199803511302
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,3072,1024,0.00443200021982193
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,3072,1024,0.015315199891726175
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,3072,1024,0.005740800003210703
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,3072,768,0.014991999665896098
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,3072,512,0.003505066782236099
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,3072,768,0.005496533215045929
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,3072,512,0.014856533209482829
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,3072,512,0.005193600058555603
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,3072,256,0.0031295999884605407
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,3072,256,0.01461120049158732
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,3072,256,0.004984533290068308
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,3072,128,0.002980266759792964
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,3072,128,0.014379733800888061
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,3072,128,0.004851200183232625
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,3072,64,0.002713600049416224
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,3072,64,0.014685866236686707
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,3072,32,0.0027872001131375628
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,3072,32,0.014436266819636025
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,2560,65536,0.12004480361938477
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,2560,65536,0.07525973320007324
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,2560,16384,0.03287253379821777
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,2560,16384,0.030173865954081218
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,2560,65536,0.09333226680755616
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,2560,12288,0.02610879937807719
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,2560,16384,0.02757546703020732
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,2560,12288,0.026301866769790648
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,2560,10240,0.022743467489878336
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,2560,12288,0.02097599903742472
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,2560,10240,0.02472426692644755
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,2560,10240,0.019236266613006592
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,2560,6144,0.015397333105405173
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,2560,7168,0.014173866311709086
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,2560,8192,0.018962132930755615
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,2560,8192,0.022711465756098427
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,2560,7168,0.01726186672846476
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,2560,8192,0.016421332955360413
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,2560,7168,0.022374399503072104
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,2560,6144,0.020885332425435384
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,2560,5120,0.013523200154304504
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,2560,6144,0.013667200009028116
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,2560,5120,0.020091732343037925
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,2560,5120,0.012248532970746358
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,2560,3584,0.01046399970849355
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,2560,4096,0.011725866794586181
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,2560,4096,0.01957226594289144
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,2560,2560,0.008374399940172831
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,2560,4096,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,2560,3584,0.010903466741243999
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,2560,3584,0.018547199169794717
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,2560,3072,0.010066133737564088
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,2560,3072,0.017821866273880004
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,2560,2560,0.016200533509254454
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,2560,3072,0.009032533566157023
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,2560,2048,0.00709440012772878
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,2560,2048,0.016089600324630738
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,2560,2560,0.008447999755541485
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,2560,1536,0.005595733225345611
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,2560,2048,0.00688213308652242
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,2560,1536,0.01580586632092794
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,2560,1024,0.004062933226426443
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,2560,512,0.00342399999499321
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,2560,1536,0.006739200154940288
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,2560,1024,0.015141333142916361
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,2560,1024,0.005809066692988077
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,2560,768,0.0037205333511034647
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,2560,768,0.01504746675491333
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,2560,512,0.014898133277893067
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,2560,768,0.005510400235652924
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,2560,256,0.00306986669699351
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,2560,512,0.005286400020122528
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,2560,256,0.014616533120473226
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,2560,128,0.002807466685771942
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,2560,256,0.004991999765237173
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,2560,128,0.01455573340257009
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,2560,64,0.002713600049416224
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,2560,128,0.004659200211366018
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,2560,64,0.014666666587193808
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,2560,32,0.0026730666557947796
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,2560,32,0.01432319978872935
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,2048,65536,0.09612800280253092
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,2048,65536,0.0619925340016683
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,2048,12288,0.023676800727844238
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,2048,65536,0.08558826446533203
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,2048,16384,0.02704213261604309
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,2048,10240,0.022693334023157756
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,2048,10240,0.01813546617825826
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,2048,16384,0.02681279977162679
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,2048,12288,0.021656533082326256
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,2048,16384,0.0254037340482076
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,2048,10240,0.018797866503397622
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,2048,12288,0.020383999745051066
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,2048,8192,0.015574399630228677
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,2048,8192,0.02122453252474467
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,2048,7168,0.014206932981808982
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,2048,8192,0.015358933806419372
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,2048,5120,0.019324799378712974
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,2048,6144,0.012612266341845193
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,2048,7168,0.02069973349571228
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,2048,7168,0.014379733800888061
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,2048,6144,0.012822399536768595
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,2048,6144,0.019769599040349327
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,2048,5120,0.011736533045768738
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,2048,4096,0.010240000486373902
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,2048,5120,0.011633066336313884
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,2048,4096,0.01837973395983378
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,2048,3584,0.009526399771372478
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,2048,4096,0.010276266932487487
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,2048,3584,0.01664426624774933
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,2048,3584,0.009512533744176228
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,2048,3072,0.00900266667207082
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,2048,3072,0.015997866789499916
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,2048,2560,0.00747519979874293
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,2048,2560,0.016264533003171287
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,2048,3072,0.008726400136947633
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,2048,2560,0.008006399869918824
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,2048,1024,0.004058666775623957
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,2048,2048,0.006760533154010773
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,2048,2048,0.015804800391197204
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,2048,1536,0.004734933376312256
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,2048,2048,0.007051733136177063
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,2048,1536,0.015343999862670899
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,2048,1536,0.006362666686375936
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,2048,1024,0.01530346671740214
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,2048,768,0.003756800045569738
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,2048,1024,0.005684266487757364
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,2048,768,0.014890666802724203
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,2048,512,0.00335359995563825
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,2048,768,0.005377066632111868
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,2048,512,0.01478506624698639
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,2048,256,0.003011200080315272
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,2048,512,0.005167999863624572
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,2048,256,0.014626133441925048
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,2048,128,0.0028607999285062153
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,2048,256,0.00483840008576711
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,2048,128,0.01437333325544993
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,2048,64,0.00266239990790685
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,2048,128,0.0045525332291920986
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,2048,64,0.014565333724021912
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,2048,32,0.0027285332481066385
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,2048,32,0.014541866381963095
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,1536,65536,0.0739786704381307
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,1536,65536,0.051636266708374026
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,1536,16384,0.022048000494639078
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,1536,65536,0.08316160043080648
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,1536,16384,0.025250132878621417
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,1536,12288,0.01768640081087748
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,1536,16384,0.02453546722730001
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,1536,12288,0.022530132532119752
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,1536,12288,0.019826134045918785
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,1536,10240,0.015401599804560342
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,1536,7168,0.01927466591199239
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,1536,10240,0.020821332931518555
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,1536,10240,0.01736853321393331
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,1536,8192,0.013158399860064188
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,1536,8192,0.02012373407681783
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,1536,8192,0.014978133638699851
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,1536,7168,0.012266666690508524
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,1536,6144,0.011100799838701884
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,1536,7168,0.013945600390434265
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,1536,6144,0.018733867009480796
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,1536,5120,0.009987200299898785
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,1536,6144,0.012467199563980102
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,1536,5120,0.018845866123835243
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,1536,4096,0.009010133147239686
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,1536,5120,0.011160533626874287
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,1536,4096,0.01692053278287252
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,1536,3584,0.00843519965807597
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,1536,4096,0.00950933297475179
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,1536,3584,0.016597333550453185
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,1536,3584,0.008909866213798523
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,1536,2560,0.007674666742483775
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,1536,2048,0.005388799806435903
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,1536,3072,0.007980800171693166
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,1536,3072,0.01679146687189738
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,1536,3072,0.008005333443482716
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,1536,2560,0.007032533486684163
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,1536,1024,0.004051200052102407
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,1536,2560,0.01614293356736501
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,1536,2048,0.015707733233769734
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,1536,768,0.003722666700681051
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,1536,2048,0.00687360018491745
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,1536,1536,0.004704000055789947
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,1536,1536,0.015526400009791056
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,1536,1024,0.015090133746465048
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,1536,1536,0.006470400094985962
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,1536,768,0.014946132898330688
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,1536,1024,0.005650133391221364
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,1536,512,0.0033226666351159418
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,1536,768,0.005400533477465311
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,1536,512,0.01477013329664866
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,1536,512,0.005099733173847198
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,1536,256,0.0030741333961486817
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,1536,256,0.014634666840235391
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,1536,256,0.004851200183232625
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,1536,32,0.014458666245142618
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,1536,128,0.0027829334139823914
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,1536,128,0.014405333002408347
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,1536,64,0.0026933332284291584
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,1536,128,0.0046293333172798155
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,1536,64,0.014642133315404256
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,1536,32,0.002674133330583572
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,1024,65536,0.04937599897384644
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,1024,65536,0.039521066347757976
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,1024,65536,0.08296000162760417
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,1024,12288,0.01978666583697001
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,1024,16384,0.01548799971739451
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,1024,16384,0.021335466702779134
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,1024,16384,0.024473599592844644
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,1024,12288,0.013194666306177775
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,1024,12288,0.01965013345082601
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,1024,10240,0.011921067039171855
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,1024,10240,0.01946773330370585
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,1024,10240,0.01721386710802714
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,1024,8192,0.010273067156473796
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,1024,8192,0.0183242658774058
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,1024,7168,0.009751466910044353
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,1024,8192,0.01471466620763143
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,1024,5120,0.017937066157658894
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,1024,7168,0.018150399128595986
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,1024,6144,0.00869866708914439
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,1024,7168,0.013193600376447043
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,1024,6144,0.017072000106175742
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,1024,5120,0.00843946635723114
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,1024,6144,0.011834667126337687
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,1024,4096,0.007249066730340321
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,1024,4096,0.01709866722424825
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,1024,3072,0.016634666919708253
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,1024,5120,0.010388267040252686
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,1024,3584,0.007100800176461537
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,1024,4096,0.009198932846387228
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,1024,3584,0.016875733931859337
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,1024,3072,0.006321066617965698
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,1024,3584,0.008821333448092144
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,1024,3072,0.008016000191370647
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,1024,2560,0.006111999849478403
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,1024,2560,0.01613866686820984
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,1024,2560,0.007640533149242401
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,1024,2048,0.005339733262856802
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,1024,2048,0.015849600235621132
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,1024,2048,0.0068906664848327635
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,1024,1536,0.004741333425045013
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,1024,1536,0.015667200088500977
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,1024,1536,0.006379733482996623
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,1024,1024,0.003945599993069967
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,1024,1024,0.015140266219774882
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,1024,1024,0.005703466633955637
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,1024,768,0.0036661334335803984
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,1024,768,0.014885333180427552
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,1024,768,0.005377066632111868
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,1024,128,0.002868266652027766
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,1024,128,0.014869333306948344
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,1024,512,0.0033290666838486993
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,1024,64,0.0029877332349618276
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,1024,512,0.014870400230089823
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,1024,256,0.003081600119670232
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,1024,256,0.01448319951693217
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,1024,256,0.004805333415667216
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,1024,512,0.0051018665234247845
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,1024,64,0.014735999703407287
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,1024,128,0.0046410664916038515
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,1024,32,0.002958933264017105
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,1024,32,0.014856533209482829
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,768,65536,0.03874666690826416
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,768,65536,0.03434346516927083
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,768,16384,0.01316480040550232
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,768,65536,0.08277973333994547
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,768,16384,0.020167466004689535
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,768,16384,0.024526933828989662
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,768,12288,0.011340799927711486
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,768,12288,0.018710400660832724
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,768,10240,0.01049066682656606
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,768,12288,0.019633066654205323
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,768,10240,0.01832853356997172
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,768,10240,0.016897066434224447
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,768,6144,0.0077909335494041445
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,768,6144,0.017352533340454102
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,768,8192,0.009243733684221904
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,768,8192,0.016977065801620485
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,768,7168,0.008532266815503438
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,768,8192,0.014421332875887552
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,768,7168,0.0176362673441569
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,768,7168,0.013018666704495748
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,768,5120,0.007223466535409291
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,768,6144,0.011632000406583149
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,768,5120,0.017534933487574258
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,768,4096,0.006246399879455566
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,768,5120,0.01042560040950775
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,768,3584,0.008894933263460796
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,768,4096,0.017241599162419637
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,768,3584,0.0065311998128890995
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,768,4096,0.009123200178146362
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,768,3584,0.016908800601959227
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,768,3072,0.006119466821352641
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,768,3072,0.01644373337427775
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,768,3072,0.008061866462230682
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,768,1536,0.004738133152325948
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,768,2560,0.006053333481152853
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,768,2560,0.016241066654523215
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,768,2048,0.005419733126958212
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,768,2560,0.007646933197975159
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,768,2048,0.015787733594576518
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,768,2048,0.006770133475462596
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,768,1536,0.015475199619928996
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,768,1024,0.00396373321612676
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,768,1536,0.006361599763234456
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,768,1024,0.0151637335618337
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,768,1024,0.005649066468079885
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,768,768,0.003740799923737844
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,768,768,0.015024000406265258
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,768,768,0.005313066641489664
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,768,512,0.0034154665966828666
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,768,512,0.015083733201026916
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,768,256,0.003260799994071325
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,768,512,0.005115733544031779
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,768,256,0.014751999576886495
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,768,32,0.002791466563940048
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,768,128,0.0029696000119050344
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,768,256,0.004925866425037384
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,768,128,0.014492799838383993
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,768,64,0.002765866617361705
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,768,128,0.004730666677157084
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,768,64,0.014724266529083253
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,768,32,0.014674133062362671
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,512,65536,0.027352533737818402
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,512,65536,0.02765866716702779
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,512,16384,0.010444800059000652
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,512,65536,0.08316906293233237
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,512,16384,0.019208532571792603
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,512,12288,0.009178666273752849
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,512,10240,0.016706132888793947
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,512,16384,0.02436586618423462
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,512,12288,0.01712426741917928
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,512,12288,0.019206400712331137
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,512,10240,0.00865066647529602
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,512,10240,0.01760960022608439
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,512,8192,0.007733333110809326
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,512,8192,0.017228800058364867
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,512,7168,0.006797866523265838
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,512,7168,0.017825067043304443
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,512,8192,0.014064000050226847
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,512,6144,0.006109866499900818
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,512,4096,0.00590826670328776
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,512,7168,0.012955733140309653
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,512,6144,0.017194666465123496
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,512,5120,0.006260266900062561
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,512,6144,0.011636267105738323
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,512,5120,0.018088533480962118
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,512,4096,0.017283199230829875
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,512,5120,0.01053439974784851
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,512,3584,0.006347733239332835
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,512,4096,0.009169066945711773
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,512,3584,0.01715839902559916
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,512,3072,0.005970133344332377
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,512,3584,0.00886400043964386
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,512,3072,0.016721065839131674
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,512,2560,0.00619946668545405
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,512,3072,0.00793280005455017
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,512,2560,0.016489600141843162
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,512,2048,0.005401599903901418
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,512,2048,0.016005333264668783
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,512,1024,0.015238400300343832
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,512,2560,0.007672533392906189
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,512,768,0.003786666691303253
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,512,1536,0.004756266872088114
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,512,768,0.015005866686503092
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,512,1536,0.015738667050997416
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,512,512,0.014920533696810404
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,512,2048,0.006676266590754191
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,512,1024,0.004152533411979675
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,512,1536,0.00633493314186732
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,512,1024,0.005643733342488607
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,512,128,0.002898133297761281
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,512,768,0.005239466826121012
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,512,512,0.0034261333445707956
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,512,256,0.003125333289305369
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,512,256,0.014814933141072592
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,512,512,0.005166933437188466
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,512,128,0.014522666732470194
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,512,256,0.004841599861780802
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,512,64,0.0027477333943049112
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,512,128,0.004586666822433472
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,512,64,0.014616533120473226
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,256,16384,0.017478400468826295
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,512,32,0.0027647999425729113
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,512,32,0.014688000082969666
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,256,65536,0.01713386575380961
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,256,65536,0.022759467363357544
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,256,65536,0.08286826610565186
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,256,16384,0.007196799914042156
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,256,12288,0.006257066627343495
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,256,8192,0.006154666841030121
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,256,16384,0.023497599363327026
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,256,12288,0.017892267306645712
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,256,10240,0.006223999957243601
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,256,12288,0.018657066424687705
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,256,10240,0.01763413349787394
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,256,8192,0.01699519952138265
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,256,10240,0.01641493340333303
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,256,7168,0.006019199887911478
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,256,8192,0.014030933380126953
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,256,7168,0.017735467354456583
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,256,6144,0.00588266650835673
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,256,6144,0.017284266153971353
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,256,7168,0.012950399518013
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,256,5120,0.006259199976921081
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,256,6144,0.011572266618410747
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,256,5120,0.01795626680056254
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,256,5120,0.010478933652242024
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,256,4096,0.0057429333527882894
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,256,4096,0.017334399620691936
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,256,4096,0.009253333012262981
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,256,3584,0.006409599880377452
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,256,3584,0.016822399695714314
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,256,3072,0.005899733304977417
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,256,3072,0.01636799971262614
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,256,3584,0.008664533495903015
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,256,3072,0.008010666569073994
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,256,2560,0.005977599819501241
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,256,1536,0.015455999970436096
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,256,2560,0.016204800208409628
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,256,2048,0.005332266787687937
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,256,2560,0.0076223999261856076
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,256,2048,0.015836800138155617
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,256,2048,0.006785066425800323
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,256,1536,0.0045962666471799215
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,256,1024,0.004042666653792063
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,256,1536,0.006274133423964183
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,256,512,0.014850133657455444
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,256,1024,0.015126400192578635
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,256,768,0.0036831999818483984
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,256,1024,0.0055402666330337524
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,256,768,0.015068800250689188
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,256,512,0.0032960000137488045
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,256,768,0.00520000010728836
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,256,256,0.003053866575161616
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,256,512,0.005021866659323374
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,256,256,0.014591999848683677
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,256,128,0.002898133297761281
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,256,128,0.014582399527231851
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,256,256,0.004779733220736186
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,256,64,0.002629333237806956
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,256,128,0.004657066861788432
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,128,16384,0.005895466605822245
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,256,64,0.014439466595649719
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,256,32,0.002656000107526779
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,256,32,0.014550399780273438
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,128,65536,0.01162453293800354
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,128,65536,0.019345066944758096
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,128,16384,0.01728746692339579
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,128,65536,0.08243199984232584
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,128,12288,0.005888000130653381
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,128,16384,0.02365866700808207
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,128,12288,0.01695786714553833
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,128,12288,0.01866133411725362
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,128,10240,0.006044800082842508
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,128,10240,0.01754986643791199
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,128,8192,0.00592853327592214
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,128,10240,0.01629866659641266
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,128,8192,0.016924800475438435
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,128,7168,0.005767466624577841
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,128,8192,0.01402453382809957
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,128,7168,0.017614932854970296
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,128,7168,0.012878933548927307
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,128,6144,0.005628799895445505
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,128,6144,0.017516799767812095
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,128,6144,0.01146986683209737
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,128,5120,0.006007466713587443
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,128,5120,0.01753066579500834
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,128,5120,0.010439466436703999
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,128,4096,0.005739733576774597
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,128,3072,0.016009599963823954
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,128,4096,0.017062399784723917
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,128,3584,0.005989333490530649
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,128,3584,0.016645333170890807
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,128,4096,0.009164800246556599
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,128,2048,0.005108266572157542
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,128,3072,0.005682133138179779
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,128,3584,0.008642133076985676
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,128,2560,0.005693866809209188
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,128,1536,0.01541759967803955
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,128,2560,0.01601066688696543
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,128,3072,0.007973333199818928
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,128,2048,0.015698132912317912
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,128,2560,0.00754559983809789
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,128,2048,0.0066101332505544026
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,128,1536,0.004485333462556204
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,128,1024,0.0037834666669368743
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,128,1024,0.015362133582433065
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,128,1536,0.006376533210277558
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,128,1024,0.005751466751098633
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,128,768,0.0036469332873821257
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,128,768,0.014963199694951376
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,128,768,0.005223466455936432
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,128,512,0.0033546666304270422
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,128,512,0.014739200472831726
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,128,512,0.005067733426888784
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,128,64,0.0026186667382717133
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,128,256,0.003011200080315272
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,128,256,0.014472533265749613
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,128,128,0.0028266665836175283
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,128,128,0.014259200294812521
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,128,256,0.004844800134499868
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,128,64,0.014626133441925048
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,4,128,128,0.004530133306980133
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,128,32,0.0026869334280490874
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,128,32,0.014516266187032065
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,64,65536,0.00902826686700185
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,64,10240,0.017340799172719322
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,64,65536,0.018320000171661376
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,64,16384,0.00572266678015391
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,64,16384,0.01714026729265849
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,64,12288,0.005782400071620941
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,64,12288,0.01699840029080709
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,64,10240,0.005862399935722351
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,64,8192,0.005831466615200042
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,64,8192,0.01660693287849426
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,64,7168,0.005658666789531708
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,64,7168,0.017391999562581383
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,64,6144,0.005550933380921682
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,64,6144,0.017288533846537273
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,64,5120,0.005870933334032694
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,64,5120,0.017356799046198527
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,64,4096,0.005756799876689911
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,64,4096,0.016739199558893837
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,64,3584,0.005995733539263407
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,64,3584,0.01685546636581421
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,64,3072,0.005672533313433329
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,64,3072,0.01632213294506073
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,64,2560,0.005625600119431814
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,64,2560,0.0157642662525177
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,64,2048,0.004996266464392344
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,64,2048,0.015614933768908181
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,64,1536,0.004421333471934
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,64,1536,0.015281066298484802
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,64,1024,0.003756800045569738
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,64,1024,0.01511146624883016
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,64,768,0.003505066782236099
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,64,768,0.01462399959564209
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,64,512,0.014812800288200378
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,64,512,0.0032138665517171226
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,64,256,0.0029365333418051405
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,64,256,0.014392532904942832
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,64,128,0.002868266652027766
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,64,128,0.0145578662554423
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,64,64,0.002603733291228612
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,64,64,0.014406399925549826
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,32,12288,0.005714133381843567
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,64,32,0.002614400039116542
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,64,32,0.01437440017859141
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,32,65536,0.007863466441631318
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,32,65536,0.019067732493082683
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,32,16384,0.005738666653633118
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,32,16384,0.017621332406997682
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,32,12288,0.016953599452972413
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,32,10240,0.005790933469931285
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,32,10240,0.017218132813771568
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,32,8192,0.005769599974155426
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,32,5120,0.017222400506337485
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,32,4096,0.0055167997876803074
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,32,8192,0.016796799500783284
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,32,3584,0.00597866674264272
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,32,4096,0.016659200191497803
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,32,7168,0.005618133147557576
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,32,7168,0.017373865842819212
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,32,6144,0.00562666654586792
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,32,6144,0.01694613297780355
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,32,5120,0.005901866654555003
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,32,2048,0.015595733126004537
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,32,3584,0.016327466567357382
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,32,1536,0.004386133452256521
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,32,1024,0.003852800031503042
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,32,3072,0.0056309332450230915
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,32,3072,0.01594239970048269
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,32,2560,0.005530666808287303
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,32,2560,0.015802666544914246
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,32,2048,0.005044266581535339
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,32,1536,0.015316266814867655
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,32,1024,0.014792533715566
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,32,768,0.003472000112136205
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,32,768,0.01462399959564209
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,32,512,0.0031925333042939507
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,32,512,0.01467519998550415
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,32,256,0.0028938665986061097
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,32,256,0.01455466647942861
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,32,128,0.0027200000981489818
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,32,128,0.01411946713924408
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,32,64,0.002569599946339925
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,65536,12288,0.5639861424763997
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,32,64,0.014344533284505209
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,4,32,32,0.0025759999950726825
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,4,32,32,0.014201600352923074
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,65536,16384,0.747100830078125
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,65536,16384,0.3931232134501139
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,65536,16384,0.35997438430786133
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,65536,12288,0.29665279388427734
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,65536,10240,0.4742111841837565
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,65536,12288,0.2722741444905599
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,65536,10240,0.24838186899820963
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,65536,8192,0.3837098757425944
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,65536,8192,0.20417493184407554
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,65536,10240,0.2254986604054769
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,65536,7168,0.332914129892985
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,65536,7168,0.178984530766805
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,65536,8192,0.1788874626159668
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,65536,6144,0.28396479288736975
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,65536,7168,0.15922880172729492
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,65536,6144,0.17373119990030925
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,65536,5120,0.23624107042948403
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,65536,6144,0.1381440003712972
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,65536,5120,0.13283519744873046
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,65536,4096,0.19266133308410643
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,65536,5120,0.12463146845499676
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,65536,4096,0.10998400052388509
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,65536,4096,0.09279253482818603
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,65536,2560,0.12019413312276203
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,65536,3584,0.16672852834065754
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,65536,3584,0.09761599699656168
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,65536,3584,0.08207680384318033
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,65536,3072,0.14406827290852864
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,65536,3072,0.09218453566233317
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,65536,3072,0.07113386789957682
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,65536,2560,0.07408106327056885
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,65536,1536,0.03889600038528442
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,65536,2048,0.0963200012842814
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,65536,2560,0.06040639877319336
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,65536,2048,0.06286506652832032
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,65536,2048,0.05804479916890463
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,65536,1536,0.07768747011820475
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,65536,1536,0.051186132431030276
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,65536,1024,0.050063999493916836
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,65536,1024,0.03885866800944011
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,65536,768,0.03907626469930013
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,65536,768,0.03350293238957723
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,65536,1024,0.027910399436950683
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,65536,512,0.026738133033116656
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,65536,768,0.022502400477727256
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,65536,512,0.027051732937494917
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,65536,512,0.018295466899871826
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,65536,128,0.00938986639181773
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,65536,256,0.01506239970525106
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,65536,32,0.004456533491611481
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,65536,64,0.01840426723162333
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,65536,256,0.02081600030263265
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,65536,256,0.011860266327857971
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,65536,128,0.009071999788284301
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,65536,128,0.01991893251736959
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,65536,64,0.006595199803511302
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,65536,32,0.01854506731033325
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,16384,65536,0.7483893076578776
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,16384,65536,0.37704105377197267
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,16384,16384,0.18440532684326172
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,16384,16384,0.10640532970428467
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,16384,10240,0.07211200396219888
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,16384,65536,0.35365546544392906
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,16384,16384,0.09323093096415201
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,16384,12288,0.14095253944396974
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,16384,12288,0.08420159816741943
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,16384,12288,0.08381760120391846
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,16384,10240,0.13774293263753254
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,16384,10240,0.06052159865697225
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,16384,7168,0.04413226842880249
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,16384,8192,0.09413759708404541
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,16384,8192,0.06941760381062825
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,16384,7168,0.08276906808217367
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,16384,8192,0.049428268273671465
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,16384,7168,0.055239466826121006
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,16384,5120,0.03310186664263408
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,16384,4096,0.048648532231648764
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,16384,6144,0.07192959785461425
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,16384,6144,0.04989546537399292
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,16384,5120,0.06010773181915283
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,16384,5120,0.04365333318710327
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,16384,6144,0.03866666555404663
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,16384,4096,0.03794240156809489
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,16384,4096,0.02760639985402425
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,16384,3584,0.04305386543273926
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,16384,3584,0.03528106609980265
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,16384,3584,0.02490880091985067
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,16384,3072,0.03743786811828613
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,16384,3072,0.03236053387324016
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,16384,3072,0.022107734282811483
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,16384,1536,0.020286933581034342
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,16384,2560,0.03155200084050496
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,16384,2560,0.02975040078163147
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,16384,2048,0.02581973274548848
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,16384,2048,0.0264629324277242
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,16384,2560,0.019692800442377725
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,16384,2048,0.01618346671263377
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,16384,1536,0.023520000775655112
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,16384,1024,0.014537599682807923
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,16384,1536,0.013408000270525614
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,16384,1024,0.020491733153661092
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,16384,1024,0.010588799913724262
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,16384,768,0.013261866569519044
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,16384,768,0.019014400243759156
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,16384,768,0.009142399827639262
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,16384,512,0.008684800068537394
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,16384,512,0.01713599960009257
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,16384,512,0.007292800148328145
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,16384,256,0.005666133264700572
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,16384,256,0.01525973379611969
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,16384,256,0.005433600147565206
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,16384,128,0.0034133332471052804
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,16384,128,0.014657066265741984
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,16384,64,0.0031701333820819853
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,16384,64,0.014709333578745524
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,16384,128,0.004950400193532308
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,16384,32,0.0031701333820819853
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,16384,32,0.014793599645296732
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,12288,65536,0.5531530380249023
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,12288,65536,0.2911989212036133
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,12288,16384,0.14374079704284667
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,12288,16384,0.08622399965922037
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,12288,10240,0.059614932537078856
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,12288,65536,0.28202133178710936
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,12288,12288,0.1099573294321696
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,12288,12288,0.06725333531697592
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,12288,16384,0.07388799985249837
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,12288,7168,0.06604799826939901
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,12288,7168,0.04528319835662842
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,12288,10240,0.10033280054728191
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,12288,12288,0.06739412943522136
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,12288,8192,0.07495573361714682
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,12288,8192,0.05525973240534464
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,12288,5120,0.04527466694513957
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,12288,10240,0.048369065920511885
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,12288,8192,0.04059946537017822
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,12288,7168,0.03579733371734619
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,12288,6144,0.05827413400014242
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,12288,6144,0.04062186479568482
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,12288,6144,0.03107946713765462
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,12288,5120,0.036347734928131106
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,12288,3584,0.030078933636347456
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,12288,4096,0.022715733448664347
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,12288,5120,0.027111466725667315
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,12288,4096,0.037357866764068604
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,12288,4096,0.03244266708691915
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,12288,3584,0.03348373174667359
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,12288,3072,0.02872213323911031
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,12288,3072,0.02810773253440857
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,12288,3584,0.020849066972732543
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,12288,2560,0.024225066105524697
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,12288,3072,0.0183242658774058
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,12288,2560,0.025204267104466754
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,12288,2048,0.020108799139658608
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,12288,1024,0.012219732999801636
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,12288,2560,0.016307199994723
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,12288,2048,0.023386667172114052
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,12288,1536,0.015826132893562318
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,12288,1536,0.021545600891113282
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,12288,2048,0.013533866405487061
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,12288,512,0.007490133245786031
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,12288,1536,0.011669333775838215
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,12288,512,0.005751466751098633
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,12288,1024,0.01925333340962728
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,12288,768,0.00974506636460622
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,12288,1024,0.009131733576456707
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,12288,768,0.018067200978597008
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,12288,768,0.008164266745249431
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,12288,128,0.004801066716512045
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,12288,64,0.0032416000962257386
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,12288,512,0.015602133671442666
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,12288,256,0.004126933217048645
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,12288,256,0.0150026669104894
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,12288,128,0.003479466587305069
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,12288,256,0.005293866495291392
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,10240,65536,0.24699734052022299
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,12288,128,0.014838400483131408
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,12288,64,0.014889599879582724
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,12288,32,0.0031680000325044
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,12288,32,0.014773333072662353
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,10240,12288,0.058627200126647946
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,10240,65536,0.4728554725646973
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,10240,16384,0.12360959847768146
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,10240,16384,0.07517866293589273
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,10240,65536,0.23820266723632813
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,10240,16384,0.06317760149637858
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,10240,12288,0.09357546965281169
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,10240,10240,0.07882026831309
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,10240,10240,0.05724586645762125
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,10240,12288,0.04856319824854533
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,10240,10240,0.0405781348546346
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,10240,8192,0.06387840112050375
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,10240,8192,0.048868266741434734
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,10240,7168,0.053407998879750576
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,10240,8192,0.03530346552530925
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,10240,7168,0.040329599380493165
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,10240,6144,0.046777598063151044
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,10240,7168,0.03120959997177124
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,10240,6144,0.037701332569122316
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,10240,6144,0.02739306688308716
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,10240,5120,0.03919680118560791
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,10240,3584,0.027455999453862505
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,10240,4096,0.0202346662680308
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,10240,5120,0.03313386638959249
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,10240,5120,0.023422932624816893
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,10240,3072,0.016386133432388306
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,10240,4096,0.031541333595911665
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,10240,4096,0.02922240098317464
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,10240,3584,0.02905813256899516
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,10240,3584,0.01831573247909546
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,10240,3072,0.024654932816823325
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,10240,3072,0.025458133220672606
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,10240,1536,0.01376426617304484
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,10240,1536,0.02028586665789286
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,10240,2560,0.020875734090805054
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,10240,2560,0.02376426657040914
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,10240,2560,0.014612266421318054
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,10240,1024,0.008663466572761536
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,10240,2048,0.017280000448226928
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,10240,2048,0.023588265975316366
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,10240,2048,0.012502400080362954
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,10240,1024,0.010123733679453533
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,10240,1536,0.010683733224868774
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,10240,1024,0.018311466773351034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,10240,768,0.008461866776148479
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,10240,768,0.016311466693878174
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,10240,512,0.006718933085600535
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,10240,768,0.006915199756622315
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,10240,512,0.01532799998919169
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,10240,512,0.005740800003210703
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,10240,256,0.0035861333211263022
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,10240,256,0.014822399616241455
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,10240,128,0.003293866664171219
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,10240,256,0.005122133096059163
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,10240,128,0.014924800395965577
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,10240,128,0.004773333172003428
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,10240,64,0.0029909332593282064
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,10240,64,0.014664533734321594
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,10240,32,0.002903466671705246
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,10240,32,0.014674133062362671
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,8192,65536,0.3717642784118652
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,8192,65536,0.19835093816121419
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,8192,65536,0.18510400454203288
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,8192,16384,0.09218986829121908
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,8192,10240,0.04358506600062052
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,8192,16384,0.060275201002756754
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,8192,16384,0.0529365340868632
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,8192,12288,0.07032960255940755
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,8192,12288,0.05550933281580607
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,8192,12288,0.04006186723709106
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,8192,10240,0.06107626756032308
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,8192,8192,0.04770666758219401
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,8192,10240,0.0345578670501709
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,8192,8192,0.03828159968058269
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,8192,7168,0.04204800128936768
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,8192,8192,0.028243199984232588
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,8192,7168,0.03477546771367391
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,8192,7168,0.025756800174713136
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,8192,6144,0.03718186616897583
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,8192,6144,0.0327839990456899
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,8192,5120,0.03115839958190918
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,8192,5120,0.029389866193135578
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,8192,6144,0.023000532388687135
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,8192,5120,0.019733333587646486
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,8192,4096,0.025509333610534667
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,8192,4096,0.02613226572672526
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,8192,3584,0.022759467363357544
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,8192,4096,0.017221333583196004
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,8192,3584,0.024875734249750772
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,8192,3072,0.020091732343037925
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,8192,3584,0.015579733252525329
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,8192,3072,0.02342933416366577
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,8192,3072,0.013964800039927163
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,8192,2560,0.017181867361068727
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,8192,2560,0.021995733181635536
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,8192,2048,0.014171733458836874
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,8192,2048,0.020388267437616982
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,8192,2560,0.012647466858228049
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,8192,1536,0.011543466647466024
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,8192,2048,0.011166933178901672
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,8192,1536,0.019242666165033975
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,8192,1536,0.009706667065620423
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,8192,512,0.005669333537419637
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,8192,512,0.015330132842063905
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,8192,1024,0.008575999736785888
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,8192,1024,0.017525333166122436
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,8192,1024,0.00738560010989507
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,8192,768,0.007328000168005626
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,8192,768,0.016017066438992818
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,8192,768,0.006097066899140676
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,8192,256,0.0036245333651701607
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,8192,512,0.005510400235652924
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,8192,256,0.014892799655596414
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,8192,256,0.0050122668345769245
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,8192,128,0.003159466634194056
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,8192,128,0.014765866597493491
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,8192,64,0.002841600030660629
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,8192,128,0.004683733483155568
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,8192,64,0.014647466937700906
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,8192,32,0.002959999938805898
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,8192,32,0.014722133676211039
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,7168,65536,0.3238442738850912
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,7168,65536,0.17963520685831708
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,7168,16384,0.08820693492889405
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,7168,10240,0.05975786844889323
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,7168,65536,0.1629759947458903
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,7168,16384,0.056049064795176184
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,7168,12288,0.06735040346781412
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,7168,16384,0.04544639984766642
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,7168,12288,0.04628906647364299
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,7168,10240,0.041519999504089355
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,7168,12288,0.041127467155456544
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,7168,10240,0.030795733133951824
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,7168,8192,0.050061865647633874
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,7168,8192,0.03552533388137817
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,7168,7168,0.03944960037867228
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,7168,8192,0.02539413372675578
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,7168,7168,0.03317013382911682
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,7168,6144,0.0353653351465861
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,7168,5120,0.01812053322792053
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,7168,7168,0.023231999079386393
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,7168,6144,0.030359466870625813
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,7168,4096,0.015440000096956888
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,7168,5120,0.029496532678604127
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,7168,6144,0.02058560053507487
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,7168,5120,0.027834665775299073
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,7168,4096,0.022974934180577597
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,7168,3072,0.022538665930430093
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,7168,4096,0.025187200307846068
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,7168,3072,0.012719999750455221
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,7168,2560,0.021384533246358237
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,7168,2048,0.013131733735402426
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,7168,2560,0.011577600240707397
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,7168,3584,0.020592000087102255
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,7168,3584,0.023825067281723022
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,7168,3584,0.014217600226402283
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,7168,3072,0.018186666568120322
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,7168,2560,0.015659733613332113
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,7168,2048,0.020228266716003418
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,7168,2048,0.010263466835021972
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,7168,1536,0.010789333780606588
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,7168,1536,0.018678400913874307
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,7168,1024,0.008137600123882293
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,7168,1536,0.009101866682370504
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,7168,1024,0.016103466351826988
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,7168,1024,0.006727466483910878
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,7168,256,0.0034495999415715536
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,7168,768,0.006896000107129414
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,7168,768,0.01497066617012024
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,7168,768,0.005924266576766968
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,7168,512,0.004198400179545084
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,7168,512,0.015044266978899637
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,7168,64,0.0028405333558718365
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,7168,512,0.00559146652619044
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,7168,64,0.014616533120473226
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,7168,256,0.014900267124176025
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,7168,256,0.005100800096988678
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,7168,128,0.003160533308982849
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,7168,128,0.014706133802731832
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,7168,128,0.004886400202910105
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,7168,32,0.002856533229351044
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,7168,32,0.014612266421318054
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,6144,65536,0.2792138735453288
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,6144,65536,0.1555967966715495
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,6144,16384,0.07445226510365804
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,6144,65536,0.14196160634358723
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,6144,16384,0.0515498677889506
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,6144,12288,0.05680319865544638
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,6144,16384,0.04042453368504842
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,6144,12288,0.04279146591822307
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,6144,12288,0.03639359871546428
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,6144,10240,0.04877013365427653
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,6144,10240,0.03744106690088908
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,6144,8192,0.039690665404001874
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,6144,10240,0.031005867322285968
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,6144,8192,0.0332202672958374
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,6144,7168,0.035070931911468504
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,6144,8192,0.0228383998076121
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,6144,7168,0.030820266405741377
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,6144,7168,0.020830933252970377
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,6144,6144,0.030364799499511718
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,6144,6144,0.028489599625269573
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,6144,5120,0.02521173357963562
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,6144,4096,0.01395626664161682
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,6144,6144,0.018756266434987387
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,6144,5120,0.026579199234644572
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,6144,4096,0.021412267287572225
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,6144,5120,0.016300800442695617
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,6144,4096,0.024026666084925333
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,6144,2560,0.014688000082969666
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,6144,3584,0.019126399358113607
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,6144,3584,0.022950400908788048
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,6144,3584,0.0130431999762853
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,6144,3072,0.016692266861597697
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,6144,3072,0.02181333303451538
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,6144,3072,0.011823999881744384
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,6144,2560,0.02063573400179545
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,6144,2560,0.010870400071144103
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,6144,2048,0.012300800283749897
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,6144,2048,0.019290665785471596
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,6144,2048,0.009478400150934856
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,6144,1536,0.009948800007502239
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,6144,1536,0.018225065867106118
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,6144,1024,0.007560533285140991
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,6144,1536,0.00869653324286143
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,6144,1024,0.01527679959932963
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,6144,1024,0.0061152001221974695
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,6144,768,0.006257066627343495
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,6144,768,0.015466666221618653
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,6144,512,0.004018133382002512
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,6144,768,0.005897599955399831
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,6144,512,0.015086932977040609
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,6144,512,0.005329066514968872
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,6144,256,0.0033503999312718712
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,6144,256,0.014620799819628397
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,6144,256,0.004934399823347728
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,6144,128,0.0030645333230495454
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,6144,128,0.01456000010172526
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,6144,128,0.004690133531888326
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,6144,64,0.0028394666810830434
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,6144,64,0.01458133359750112
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,6144,32,0.002867199977238973
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,6144,32,0.014568533500035605
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,5120,65536,0.23663786252339683
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,5120,65536,0.1352959950764974
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,5120,16384,0.06237546602884928
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,5120,65536,0.12267306645711262
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,5120,16384,0.04551146825154622
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,5120,16384,0.03529706796010335
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,5120,8192,0.034167468547821045
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,5120,12288,0.04831999937693278
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,5120,12288,0.03694186607996623
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,5120,12288,0.03180053234100342
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,5120,10240,0.040749867757161454
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,5120,10240,0.033470932642618814
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,5120,10240,0.027435733874638872
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,5120,8192,0.02955946723620097
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,5120,8192,0.020292266209920248
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,5120,7168,0.02810666759808858
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,5120,7168,0.02746773362159729
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,5120,6144,0.02502506573994954
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,5120,7168,0.018758400281270345
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,5120,6144,0.02608426610628764
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,5120,6144,0.01676693360010783
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,5120,5120,0.021418666839599608
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,5120,5120,0.02401706576347351
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,5120,5120,0.014507733782132468
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,5120,3072,0.01394773324330648
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,5120,4096,0.017326933145523072
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,5120,4096,0.02224853237469991
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,5120,4096,0.012681600451469422
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,5120,3584,0.0155157337586085
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,5120,3584,0.021409066518147786
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,5120,2048,0.010259200135866802
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,5120,3584,0.01206719974676768
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,5120,3072,0.02035520076751709
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,5120,2560,0.012058666348457337
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,5120,3072,0.010939733187357584
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,5120,2560,0.01968533396720886
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,5120,2560,0.010160000125567118
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,5120,2048,0.018529067436854042
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,5120,2048,0.008973866701126099
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,5120,1536,0.008618666728337606
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,5120,1536,0.01605226695537567
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,5120,1024,0.006782933572928111
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,5120,512,0.003655466685692469
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,5120,1536,0.007577600081761678
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,5120,1024,0.015591466426849365
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,5120,256,0.0032458665470282235
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,5120,1024,0.0062730665008227035
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,5120,256,0.014779733618100485
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,5120,768,0.005224533379077911
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,5120,128,0.014652799566586813
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,5120,256,0.005094400048255921
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,5120,768,0.015228799978892007
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,5120,512,0.015094400445620219
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,5120,768,0.005773866673310598
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,5120,512,0.005217066903909048
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,4096,65536,0.18562560081481932
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,5120,128,0.0029525332152843474
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,5120,128,0.004681600133577982
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,5120,64,0.0028192001084486645
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,5120,64,0.014553599556287131
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,5120,32,0.0028533334533373516
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,4096,12288,0.03879040082295736
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,5120,32,0.014605866869290671
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,4096,65536,0.10734612941741943
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,4096,65536,0.10567253430684406
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,4096,16384,0.049510399500528976
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,4096,16384,0.03840853373209636
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,4096,16384,0.031096533934275312
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,4096,12288,0.03299946586290996
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,4096,10240,0.032679466406504314
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,4096,12288,0.02467199961344401
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,4096,10240,0.0301418662071228
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,4096,8192,0.027036799987157183
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,4096,7168,0.016724266608556113
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,4096,6144,0.02408533294995626
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,4096,10240,0.024459733565648397
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,4096,8192,0.026921600103378296
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,4096,8192,0.018347734212875368
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,4096,7168,0.0245088001092275
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,4096,7168,0.02539199988047282
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,4096,6144,0.02180160085360209
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,4096,6144,0.014907733599344889
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,4096,5120,0.018956800301869713
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,4096,5120,0.0224671999613444
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,4096,4096,0.01575146714846293
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,4096,5120,0.013268267114957174
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,4096,4096,0.021005866924921672
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,4096,3584,0.013196800152460733
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,4096,3584,0.020269866784413657
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,4096,4096,0.011404800415039062
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,4096,3584,0.01113813320795695
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,4096,3072,0.011774933338165284
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,4096,2048,0.017683200041453042
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,4096,3072,0.01953386664390564
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,4096,3072,0.009988266229629516
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,4096,2560,0.010431999961535137
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,4096,2560,0.01858133276303609
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,4096,1024,0.005852800110975901
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,4096,2560,0.00950933297475179
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,4096,1024,0.015178666512171427
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,4096,2048,0.008941866954167684
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,4096,1536,0.007574399809042613
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,4096,1536,0.015770666797955833
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,4096,2048,0.008205866813659668
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,4096,1536,0.006920533378918965
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,4096,768,0.003985066711902618
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,4096,768,0.015006933609644571
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,4096,1024,0.00594346672296524
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,4096,512,0.005283200244108836
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,4096,512,0.0035242666800816857
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,4096,128,0.014501333236694336
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,4096,512,0.014940800269444785
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,4096,768,0.005502933263778686
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,4096,256,0.003190399954716364
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,4096,256,0.014663466811180114
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,4096,256,0.0047872001926104225
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,4096,128,0.002994133283694585
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,4096,64,0.0028192001084486645
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,3584,65536,0.09870399634043375
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,4096,64,0.014502400159835815
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,4096,32,0.002757333219051361
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,4096,128,0.004726399978001913
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,3584,12288,0.03501439889272054
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,4096,32,0.014531200130780539
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,3584,65536,0.1673301378885905
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,3584,16384,0.04689493179321289
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,3584,16384,0.03606933355331421
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,3584,65536,0.10291840235392254
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,3584,16384,0.03006400068600972
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,3584,12288,0.03065813382466634
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,3584,10240,0.02953280011812846
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,3584,12288,0.022907733917236328
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,3584,10240,0.029100799560546876
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,3584,8192,0.024848000208536784
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,3584,10240,0.021016534169514975
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,3584,8192,0.025254400571187337
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,3584,7168,0.022035199403762817
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,3584,7168,0.024009599288304647
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,3584,8192,0.017921066284179686
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,3584,6144,0.019347200791041054
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,3584,7168,0.015301332871119181
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,3584,6144,0.022703999280929567
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,3584,5120,0.01708266735076904
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,3584,6144,0.014648532867431641
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,3584,5120,0.02159893314043681
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,3584,4096,0.013290666540463767
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,3584,5120,0.012905599673589072
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,3584,4096,0.020266666014989217
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,3584,3584,0.012045866250991822
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,3584,4096,0.010949333508809406
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,3584,3584,0.019716266791025797
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,3584,3072,0.010846933722496033
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,3584,3584,0.010993066430091857
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,3584,3072,0.019067732493082683
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,3584,2560,0.009565866986910502
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,3584,2560,0.018398932615915933
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,3584,3072,0.009832533200581868
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,3584,2048,0.008392533659934998
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,3584,2560,0.009139200051625569
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,3584,2048,0.01715946594874064
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,3584,1536,0.00718506673971812
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,3584,2048,0.007771733403205872
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,3584,1536,0.015362133582433065
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,3584,1536,0.006491733094056447
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,3584,1024,0.00477866679430008
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,3584,1024,0.015460266669591268
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,3584,768,0.003853866706291834
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,3584,256,0.0031306666632493338
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,3584,768,0.015228799978892007
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,3584,1024,0.005702400207519531
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,3584,512,0.003554133325815201
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,3584,512,0.01492800017197927
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,3584,512,0.00522986650466919
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,3584,768,0.005659733215967814
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,3584,256,0.014713600277900696
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,3584,128,0.0028757333755493166
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,3584,256,0.005044266581535339
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,3584,128,0.014482133587201438
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,3584,64,0.0027114666998386385
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,3584,128,0.004635733366012573
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,3072,65536,0.08921279907226562
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,3584,64,0.014537599682807923
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,3584,32,0.002734933296839396
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,3584,32,0.014455466469128927
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,3072,65536,0.1425546646118164
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,3072,65536,0.08604693412780762
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,3072,16384,0.03943039973576863
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,3072,16384,0.03298453291257222
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,3072,12288,0.03128640055656433
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,3072,8192,0.021755733092625937
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,3072,16384,0.026337067286173504
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,3072,12288,0.028562132517496747
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,3072,10240,0.029778132836023968
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,3072,10240,0.026143999894460042
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,3072,7168,0.0234933336575826
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,3072,12288,0.02249493400255839
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,3072,8192,0.02459733287493388
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,3072,10240,0.01869973341623942
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,3072,7168,0.01967039903004964
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,3072,7168,0.015331199765205384
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,3072,8192,0.016552533706029257
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,3072,6144,0.017411200205485027
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,3072,6144,0.02219840089480082
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,3072,5120,0.015174399813016257
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,3072,6144,0.013854933778444925
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,3072,5120,0.020827732483545938
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,3072,4096,0.013843199610710144
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,3072,5120,0.01202133297920227
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,3072,4096,0.019640533129374187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,3072,4096,0.011103999614715577
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,3072,3584,0.012550399700800578
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,3072,3584,0.019054933389027914
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,3072,3072,0.011230933666229247
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,3072,3584,0.01062506635983785
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,3072,3072,0.018710400660832724
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,3072,3072,0.009251200159390767
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,3072,2560,0.008943999807039898
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,3072,2560,0.017429333925247193
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,3072,1536,0.00650133341550827
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,3072,2560,0.008796800176302593
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,3072,1024,0.015180800358454385
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,3072,2048,0.007714133461316426
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,3072,2048,0.015768532951672874
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,3072,1536,0.006659199794133504
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,3072,2048,0.007602133353551229
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,3072,1536,0.015546666582425437
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,3072,1024,0.004410666724046072
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,3072,768,0.003905066599448522
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,3072,1024,0.0058037335673968
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,3072,768,0.015147733688354491
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,3072,512,0.003623466690381368
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,3072,768,0.00553706685702006
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,3072,512,0.015033599734306336
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,3072,256,0.0031658666829268134
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,3072,512,0.005195733408133189
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,3072,256,0.014523733655611673
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,3072,256,0.005026133358478546
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,3072,128,0.002882133424282074
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,3072,128,0.014605866869290671
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,3072,64,0.0027295999228954316
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,3072,128,0.004713599880536398
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,3072,64,0.014390400052070618
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,2560,16384,0.030014934142430623
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,3072,32,0.0029685333371162414
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,2560,12288,0.025862399737040204
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,3072,32,0.014512000481287637
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,2560,65536,0.11902720133463543
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,2560,65536,0.07475199699401855
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,2560,65536,0.09306773344675699
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,2560,16384,0.03278506596883138
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,2560,10240,0.019030400117238364
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,2560,12288,0.026293333371480303
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,2560,16384,0.02765226761500041
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,2560,12288,0.020541866620381675
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,2560,10240,0.022523732980092366
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,2560,10240,0.024375466505686443
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,2560,8192,0.018900267283121743
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,2560,8192,0.02258560061454773
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,2560,8192,0.016502400239308677
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,2560,5120,0.01341546674569448
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,2560,7168,0.017087999979654947
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,2560,7168,0.02179626623789469
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,2560,7168,0.014134400089581809
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,2560,6144,0.015304533640543619
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,2560,6144,0.021025067567825316
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,2560,4096,0.01034986674785614
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,2560,3584,0.010813867052396137
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,2560,5120,0.019980800151824952
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,2560,6144,0.013678933183352152
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,2560,4096,0.01167146662871043
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,2560,5120,0.012359467148780823
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,2560,4096,0.018910932540893554
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,2560,3584,0.0184608002503713
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,2560,3584,0.009987200299898785
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,2560,3072,0.010045866171518963
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,2560,3072,0.017826133966445924
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,2560,3072,0.009141332904497783
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,2560,2560,0.008249600231647492
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,2560,2560,0.01614293356736501
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,2560,2560,0.008317866424719492
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,2560,1024,0.004156800111134847
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,2560,2048,0.007108266651630402
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,2560,2048,0.015876266360282897
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,2560,1536,0.005723733206590017
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,2560,2048,0.0067562664548556015
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,2560,1536,0.015603199601173401
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,2560,1536,0.006488533318042755
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,2560,512,0.0034624000390370687
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,2560,1024,0.01514346698919932
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,2560,512,0.005245866874853769
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,2560,1024,0.005857066810131073
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,2560,768,0.00384853333234787
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,2560,128,0.0028234665592511495
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,2560,768,0.014978133638699851
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,2560,768,0.005508266886075338
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,2560,64,0.002784000088771184
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,2560,512,0.01476479967435201
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,2560,256,0.003197866678237915
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,2560,256,0.014601600170135499
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,2560,256,0.004925866425037384
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,2560,128,0.014509866635004679
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,2560,128,0.00470719983180364
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,2560,64,0.014547200004259745
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,2560,32,0.002773333340883255
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,2560,32,0.014632532993952433
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,2048,65536,0.09617173671722412
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,2048,65536,0.06067306598027548
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,2048,65536,0.08524373372395834
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,2048,16384,0.02704426646232605
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,2048,10240,0.02262079914410909
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,2048,16384,0.02675519982973735
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,2048,16384,0.025407999753952026
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,2048,12288,0.021594667434692384
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,2048,12288,0.02381653388341268
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,2048,10240,0.018764799833297728
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,2048,12288,0.020566399892171225
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,2048,10240,0.017816533644994102
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,2048,8192,0.015496533115704855
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,2048,8192,0.02161173423131307
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,2048,7168,0.014310399691263834
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,2048,7168,0.020679465929667153
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,2048,8192,0.01566933294137319
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,2048,7168,0.014153599739074707
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,2048,6144,0.01286720037460327
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,2048,6144,0.02003306746482849
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,2048,5120,0.011798399686813354
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,2048,6144,0.0126720001300176
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,2048,5120,0.01947306593259176
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,2048,5120,0.011720533172289532
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,2048,4096,0.01027413308620453
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,2048,4096,0.01849386692047119
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,2048,4096,0.01014293332894643
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,2048,3584,0.009617066383361817
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,2048,3584,0.01680533289909363
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,2048,3584,0.009181867043177288
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,2048,3072,0.009035733342170716
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,2048,3072,0.01644159952799479
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,2048,2560,0.00749120016892751
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,2048,3072,0.008371200164159138
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,2048,2560,0.016229333480199178
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,2048,2560,0.007773866752783458
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,2048,2048,0.006693333387374878
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,2048,2048,0.015745066603024802
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,2048,1536,0.004717866579691568
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,2048,768,0.0036917333801587426
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,2048,2048,0.006774400174617767
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,2048,1536,0.015448533495267234
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,2048,1536,0.006390400230884552
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,2048,1024,0.004023466755946477
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,2048,1024,0.01511573294798533
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,2048,1024,0.005602133274078369
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,2048,768,0.014942933122316995
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,2048,768,0.005350400010744731
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,2048,128,0.002844800055027008
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,2048,512,0.0033781332274278007
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,2048,512,0.014753066500027976
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,2048,512,0.005046399931112925
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,2048,256,0.002979200085004171
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,2048,256,0.014528000354766845
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,2048,256,0.0047872001926104225
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,2048,128,0.014327466487884521
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,2048,128,0.004658133288224538
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,2048,64,0.0026506667335828146
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,2048,64,0.014317867159843446
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,2048,32,0.00264533335963885
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,2048,32,0.014469333489735923
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,1536,65536,0.07339733441670736
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,1536,65536,0.050324265162150064
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,1536,65536,0.08290239969889322
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,1536,16384,0.021996800104777017
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,1536,16384,0.024065067370732628
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,1536,12288,0.01774079998334249
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,1536,12288,0.021891200542449953
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,1536,16384,0.024632533391316734
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,1536,12288,0.019690666596094766
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,1536,10240,0.01537493367989858
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,1536,7168,0.019208532571792603
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,1536,10240,0.020780799786249797
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,1536,8192,0.012970667084058127
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,1536,6144,0.018798933426539103
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,1536,10240,0.017672532796859743
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,1536,8192,0.019773866732915243
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,1536,7168,0.01220906674861908
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,1536,8192,0.014941866199175516
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,1536,7168,0.013760000467300415
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,1536,6144,0.011002666751543681
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,1536,5120,0.0100490669409434
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,1536,5120,0.019016534090042114
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,1536,6144,0.012544000148773193
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,1536,4096,0.009064533313115438
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,1536,4096,0.016934400796890257
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,1536,5120,0.01114026705423991
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,1536,4096,0.009292800227801006
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,1536,3584,0.008590933680534363
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,1536,3584,0.016636799772580466
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,1536,3072,0.007942399879296621
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,1536,3584,0.00909866690635681
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,1536,3072,0.016501333316167197
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,1536,3072,0.007942399879296621
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,1536,2560,0.0068800002336502075
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,1536,2560,0.01621119976043701
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,1536,2048,0.005480533341566721
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,1536,2560,0.007578666508197785
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,1536,2048,0.015837867061297098
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,1536,2048,0.006769066552321117
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,1536,1536,0.0047199999292691554
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,1536,1536,0.015365333358446757
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,1536,1024,0.004010666658480962
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,1536,1536,0.006397866706053417
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,1536,1024,0.015153066317240397
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,1536,1024,0.005618133147557576
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,1536,768,0.003640533238649368
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,1536,768,0.015015467007954916
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,1536,512,0.0033471999069054925
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,1536,768,0.005218133330345154
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,1536,512,0.014863999684651694
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,1536,512,0.005036800106366476
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,1536,256,0.0031082667410373688
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,1536,256,0.014524799585342408
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,1536,128,0.0028192001084486645
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,1536,256,0.004828799764315287
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,1536,128,0.014317867159843446
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,1536,128,0.004676266511281332
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,1536,64,0.0026208000878492994
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,1536,64,0.014509866635004679
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,1536,32,0.0026943999032179515
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,1536,32,0.014471466342608133
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,1024,65536,0.049561599890391037
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,1024,16384,0.02453440030415853
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,1024,65536,0.03886186679204305
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,1024,16384,0.015705600380897522
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,1024,12288,0.019847466548283895
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,1024,16384,0.021206400791803994
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,1024,65536,0.08344000180562337
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,1024,8192,0.018361600240071614
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,1024,12288,0.013275733590126038
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,1024,12288,0.01952106753985087
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,1024,10240,0.011848533153533935
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,1024,10240,0.019356799125671387
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,1024,10240,0.017126399278640746
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,1024,8192,0.010446932911872864
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,1024,7168,0.009754666686058044
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,1024,8192,0.014808533589045205
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,1024,7168,0.017831466595331826
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,1024,7168,0.013386666774749756
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,1024,6144,0.008890666564305623
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,1024,6144,0.017218132813771568
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,1024,5120,0.008288000027338665
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,1024,6144,0.011876266201337178
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,1024,5120,0.017833600441614784
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,1024,5120,0.01050879955291748
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,1024,4096,0.007474133372306823
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,1024,4096,0.017127466201782227
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,1024,3072,0.016286933422088624
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,1024,3584,0.00692799985408783
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,1024,2560,0.016158933440844216
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,1024,4096,0.009274666508038838
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,1024,3584,0.01688746611277262
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,1024,3584,0.008819199601809184
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,1024,3072,0.006259199976921081
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,1024,2560,0.005966933568318685
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,1024,3072,0.007937066753705342
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,1024,2048,0.005308799942334493
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,1024,2048,0.01586026648680369
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,1024,2560,0.007525333265463512
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,1024,1536,0.004671999812126159
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,1024,2048,0.006773333251476288
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,1024,1536,0.015458133816719056
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,1024,1024,0.003976533313592275
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,1024,1536,0.0064085334539413456
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,1024,1024,0.01520639955997467
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,1024,768,0.003610666592915853
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,1024,768,0.014814933141072592
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,1024,1024,0.005746133128801982
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,1024,512,0.0032927999893824257
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,1024,512,0.014907733599344889
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,1024,768,0.005438933273156484
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,1024,256,0.0030762667457262674
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,1024,64,0.0030005333324273427
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,1024,64,0.014728533228238425
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,1024,256,0.014568533500035605
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,1024,512,0.0049792001644770306
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,1024,128,0.0029450667401154833
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,1024,128,0.014760532975196838
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,1024,256,0.004826666911443075
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,1024,128,0.004666666686534882
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,1024,32,0.0029578665892283124
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,1024,32,0.014709333578745524
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,768,65536,0.03865706523259481
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,768,65536,0.033367466926574704
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,768,16384,0.013142399986584983
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,768,65536,0.08262506326039633
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,768,16384,0.02004800041516622
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,768,12288,0.011415466666221619
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,768,16384,0.024423466126124064
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,768,12288,0.01885546644528707
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,768,8192,0.014542933305104574
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,768,12288,0.01946773330370585
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,768,10240,0.01016960044701894
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,768,10240,0.017874133586883546
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,768,8192,0.009336533149083455
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,768,10240,0.016897066434224447
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,768,8192,0.01722559928894043
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,768,7168,0.00863039990266164
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,768,7168,0.0175327996412913
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,768,6144,0.007855999966462452
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,768,6144,0.017198934157689413
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,768,7168,0.01297813355922699
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,768,6144,0.011829333504041036
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,768,5120,0.007276799778143566
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,768,5120,0.01776533325513204
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,768,5120,0.01042133371035258
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,768,4096,0.00606826643149058
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,768,4096,0.01708586613337199
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,768,3584,0.0065311998128890995
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,768,4096,0.009212799866994222
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,768,3584,0.016707199811935424
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,768,3072,0.006097066899140676
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,768,3072,0.016371200482050575
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,768,3584,0.008843732873598735
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,768,2560,0.006050133208433787
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,768,3072,0.007881600161393483
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,768,2560,0.016104533274968465
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,768,2048,0.005402666827042898
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,768,2048,0.015843199690183003
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,768,2560,0.007605333129564922
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,768,1536,0.004752000172932943
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,768,2048,0.006843733290831248
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,768,1536,0.015271466970443726
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,768,1536,0.006427733103434245
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,768,1024,0.003953066716591517
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,768,1024,0.015107199549674988
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,768,768,0.0037109332780043284
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,768,1024,0.005730133255322774
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,768,768,0.015275733669598899
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,768,768,0.005276800195376078
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,768,512,0.0033173332611719764
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,768,512,0.015072000026702882
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,768,256,0.00323840007185936
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,768,512,0.005022933085759481
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,768,256,0.014659200112024942
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,768,32,0.002828799933195114
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,768,128,0.002961066613594691
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,768,128,0.014750933647155762
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,512,65536,0.027561599016189577
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,768,256,0.004716800153255462
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,768,64,0.0028618666032950084
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,768,128,0.004658133288224538
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,768,64,0.014506666858990987
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,768,32,0.014355199535687766
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,512,65536,0.027345067262649535
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,512,16384,0.010413866241772969
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,512,65536,0.08299199740091959
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,512,16384,0.018708266814549766
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,512,16384,0.02397759954134623
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,512,12288,0.00904319981733958
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,512,12288,0.016982400417327882
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,512,10240,0.008558932940165203
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,512,10240,0.017953066031138103
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,512,12288,0.01904746691385905
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,512,8192,0.007579733431339264
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,512,10240,0.016794667641321818
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,512,8192,0.017223467429478966
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,512,7168,0.006477866570154827
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,512,8192,0.013927466670672097
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,512,7168,0.017627733945846557
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,512,6144,0.006089599927266439
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,512,7168,0.012769066294034324
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,512,6144,0.01722559928894043
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,512,5120,0.0062720000743865965
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,512,6144,0.011627733707427979
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,512,4096,0.009155199925104777
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,512,5120,0.01792959968249003
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,512,5120,0.010455466310183207
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,512,4096,0.005859200159708659
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,512,4096,0.01728106737136841
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,512,3584,0.006340266764163971
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,512,3584,0.01691626707712809
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,512,3584,0.008789333701133727
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,512,3072,0.006056533257166544
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,512,3072,0.016474666198094685
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,512,3072,0.007973333199818928
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,512,2560,0.006138666470845541
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,512,2560,0.01618773341178894
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,512,2560,0.007682133217652638
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,512,2048,0.005470933516820272
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,512,2048,0.016051200032234193
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,512,1024,0.005533866584300995
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,512,2048,0.006772266825040181
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,512,1536,0.00470719983180364
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,512,1536,0.01544426679611206
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,512,1024,0.004116266717513402
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,512,1536,0.0063498665889104204
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,512,1024,0.01527253290017446
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,512,768,0.003756800045569738
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,512,768,0.01530239979426066
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,512,768,0.005305600166320801
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,512,512,0.0033930666744709016
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,512,512,0.014740266402562461
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,512,512,0.005047466854254405
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,512,256,0.0031648000081380212
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,512,256,0.014575999975204468
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,512,256,0.004794666667779287
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,512,128,0.0028042666614055633
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,512,128,0.014668800433476768
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,512,64,0.00276799996693929
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,512,128,0.0046965335806210835
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,512,64,0.01441493332386017
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,512,32,0.002765866617361705
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,512,32,0.014542933305104574
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,256,65536,0.016694400707880655
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,256,65536,0.022616533438364665
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,256,65536,0.08214720090230307
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,256,16384,0.007353599866231282
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,256,10240,0.01769599914550781
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,256,16384,0.01754986643791199
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,256,10240,0.01640959978103638
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,256,16384,0.023758933941523234
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,256,8192,0.01743040084838867
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,256,12288,0.006244266529877981
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,256,12288,0.017245866854985557
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,256,10240,0.006138666470845541
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,256,12288,0.01869973341623942
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,256,8192,0.006179200112819671
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,256,7168,0.00595306654771169
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,256,7168,0.01761066714922587
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,256,8192,0.01386240025361379
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,256,7168,0.01286186675230662
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,256,4096,0.005854933460553487
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,256,6144,0.0059114664793014525
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,256,6144,0.017244799931844076
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,256,5120,0.00625600020090739
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,256,6144,0.011615999539693196
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,256,5120,0.0179584006468455
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,256,5120,0.010429867108662923
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,256,4096,0.01730239987373352
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,256,3584,0.0063733334342638654
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,256,3584,0.016911999384562174
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,256,4096,0.009019733468691508
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,256,3584,0.008727467060089112
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,256,3072,0.005994666616121928
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,256,3072,0.01655893325805664
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,256,3072,0.007900799810886382
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,256,2560,0.005983999868233999
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,256,2560,0.01630506714185079
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,256,2048,0.005297066768010458
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,256,2560,0.0074527998765309645
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,256,1024,0.015081600348154704
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,256,2048,0.01592853367328644
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,256,1536,0.004645333190759023
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,256,2048,0.006716800232728322
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,256,1536,0.015319466590881348
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,256,1536,0.006313600142796834
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,256,1024,0.004091733445723852
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,256,768,0.0036085332433382668
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,256,768,0.015117866794268289
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,256,1024,0.005524266759554545
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,256,512,0.003382399926582972
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,256,768,0.005224533379077911
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,256,512,0.014717866977055868
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,256,512,0.005049600203831991
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,256,64,0.0026506667335828146
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,256,256,0.002995199958483378
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,256,256,0.014531200130780539
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,256,256,0.004714666803677877
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,128,65536,0.011377066373825073
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,256,128,0.0028480000793933867
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,128,65536,0.08163519700368246
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,256,128,0.01441493332386017
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,256,128,0.004654933512210846
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,256,64,0.014342400431632995
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,256,32,0.002701866626739502
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,256,32,0.014405333002408347
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,128,12288,0.018758400281270345
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,128,65536,0.019030400117238364
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,128,16384,0.005965866645177205
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,128,16384,0.017218132813771568
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,128,16384,0.02364586591720581
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,128,12288,0.00573333352804184
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,128,12288,0.017203199863433837
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,128,10240,0.0060032000144322716
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,128,10240,0.017093332608540852
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,128,10240,0.01634239951769511
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,128,8192,0.00594346672296524
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,128,8192,0.016801067193349204
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,128,7168,0.005817600091298421
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,128,8192,0.013986133535703025
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,128,7168,0.01755946675936381
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,128,6144,0.005580799778302511
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,128,4096,0.005745066702365876
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,128,7168,0.01279146671295166
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,128,6144,0.017077332735061644
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,128,5120,0.005950933198134104
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,128,5120,0.01731200019518534
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,128,6144,0.011495467027028401
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,128,5120,0.010341333349545796
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,128,4096,0.01680213411649068
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,128,4096,0.009155199925104777
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,128,3584,0.006065066655476888
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,128,3584,0.016714666287104288
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,128,3584,0.0086517333984375
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,128,2048,0.005087999999523163
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,128,3072,0.005724800129731497
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,128,3072,0.016201600432395935
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,128,1536,0.004462933540344239
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,128,3072,0.007893333335717519
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,128,2560,0.00565226674079895
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,128,2560,0.015677866339683533
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,128,2560,0.0075445334116617845
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,128,2048,0.015517866611480713
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,128,768,0.0036277333895365395
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,128,2048,0.0066431999206542965
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,128,1536,0.0155349334081014
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,128,1536,0.006302933394908905
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,128,1024,0.003869866579771042
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,128,1024,0.015049599607785544
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,128,512,0.005013333261013031
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,128,256,0.0031199999153614043
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,128,1024,0.005694933235645294
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,128,768,0.014819199840227762
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,128,512,0.003403733422358831
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,128,768,0.005217066903909048
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,128,512,0.014962133765220643
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,128,256,0.014435199896494546
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,128,128,0.0028351999819278715
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,128,256,0.0046623999873797095
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,128,128,0.014133333166440328
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,128,64,0.002712533374627431
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,2,128,128,0.004554666578769684
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,128,64,0.014405333002408347
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,128,32,0.0026346666117509207
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,128,32,0.014377599954605103
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,64,65536,0.008508800466855367
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,64,65536,0.017965867122014364
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,64,16384,0.005746133128801982
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,64,16384,0.01726186672846476
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,64,12288,0.0056415999929110205
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,64,12288,0.016793600718180337
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,64,10240,0.005909333129723867
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,64,10240,0.017166932423909508
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,64,8192,0.005834666887919108
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,64,5120,0.01723840037981669
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,64,8192,0.01685333251953125
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,64,7168,0.005815466741720835
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,64,7168,0.017338667313257852
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,64,6144,0.0055285334587097164
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,64,6144,0.01689280072848002
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,64,5120,0.005907199780146281
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,64,4096,0.0055285334587097164
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,64,2560,0.01584106683731079
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,64,4096,0.016617600123087564
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,64,3584,0.00595306654771169
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,64,3584,0.016495999693870545
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,64,3072,0.005773866673310598
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,64,3072,0.01599253316720327
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,64,2560,0.0055978665749231975
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,64,2048,0.005046399931112925
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,64,2048,0.01550933321317037
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,64,1536,0.004446933170159658
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,64,512,0.014962133765220643
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,64,1536,0.015281066298484802
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,64,1024,0.0037823999921480812
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,64,1024,0.015235199530919393
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,64,768,0.003502933432658514
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,64,768,0.014878933628400167
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,64,512,0.003268266717592875
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,64,256,0.0030623999734719592
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,64,256,0.01467519998550415
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,64,128,0.0027434666951497394
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,64,128,0.014154666662216186
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,64,64,0.0026858667532602947
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,64,64,0.014261333147684732
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,64,32,0.0026357332865397137
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,64,32,0.014340266585350037
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,32,65536,0.007703466713428498
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,32,65536,0.018524799744288126
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,32,16384,0.005727999905745188
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,32,16384,0.016851200660069784
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,32,12288,0.005657599866390228
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,32,12288,0.016744534174601235
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,32,10240,0.00589333325624466
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,32,10240,0.017095466454823814
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,32,8192,0.005807999769846598
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,32,8192,0.016731733083724977
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,32,7168,0.005592533449331919
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,32,7168,0.01702186663945516
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,32,3584,0.005952000121275584
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,32,6144,0.005618133147557576
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,32,6144,0.016898133357365928
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,32,5120,0.00591893345117569
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,32,2560,0.005529599885145823
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,32,5120,0.01730026602745056
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,32,4096,0.005529599885145823
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,32,4096,0.016846932967503867
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,32,3584,0.016239999731381734
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,32,3072,0.005623466769854228
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,32,3072,0.016244266430536905
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,32,2560,0.01578133304913839
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,32,2048,0.004948266843954722
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,32,2048,0.015525333086649575
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,32,1536,0.0044266665975252785
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,32,1536,0.015165866414705912
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,32,1024,0.0037845333417256674
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,32,1024,0.015082666277885437
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,32,768,0.003607466568549474
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,32,768,0.014813866217931113
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,32,512,0.0031776001056035364
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,32,512,0.014532267053922018
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,32,256,0.0028778667251269023
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,32,256,0.014482133587201438
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,32,128,0.002726399898529053
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,32,128,0.014285866419474283
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,65536,16384,0.3559029261271159
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,32,64,0.0026602665583292644
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,32,64,0.014437333742777506
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,2,32,32,0.0026357332865397137
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,65536,10240,0.47207040786743165
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,65536,12288,0.26818027496337893
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,2,32,32,0.01437013347943624
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,65536,16384,0.7470165252685547
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,65536,16384,0.3894165356953939
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,65536,12288,0.5636447906494141
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,65536,12288,0.29620161056518557
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,65536,10240,0.2485525290171305
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,65536,8192,0.3810677210489909
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,65536,10240,0.22460907300313315
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,65536,8192,0.20381226539611816
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,65536,8192,0.21213547388712564
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,65536,5120,0.23634986877441405
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,65536,7168,0.3284309387207031
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,65536,5120,0.12081387042999267
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,65536,7168,0.19588267008463542
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,65536,6144,0.2838570594787598
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,65536,6144,0.15599466959635416
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,65536,7168,0.15939733187357585
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,65536,3584,0.16642239888509114
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,65536,4096,0.09333013693491618
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,65536,6144,0.13603307406107584
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,65536,5120,0.1321664015452067
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,65536,4096,0.19298772811889647
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,65536,4096,0.10794666608174641
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,65536,3584,0.09753920237223307
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,65536,3072,0.15089492797851561
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,65536,3584,0.08783146540323893
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,65536,3072,0.08577600320180258
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,65536,3072,0.07876373132069905
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,65536,2560,0.12019200325012207
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,65536,2560,0.07461120287577311
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,65536,2560,0.06008106470108032
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,65536,2048,0.09599680105845133
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,65536,2048,0.06215253273646036
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,65536,1536,0.07367893060048422
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,65536,2048,0.04978453318277995
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,65536,1536,0.05091520150502523
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,65536,1536,0.04394773244857788
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,65536,1024,0.049876264731089276
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,65536,1024,0.038819201787312824
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,65536,768,0.0388213316599528
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,65536,1024,0.027600000301996868
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,65536,768,0.03330986698468526
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,65536,512,0.026712532838185626
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,65536,768,0.022050132354100548
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,65536,512,0.026924800872802735
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,65536,512,0.01656000018119812
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,65536,64,0.0063733334342638654
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,65536,256,0.014886400103569031
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,65536,256,0.020884267489115396
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,65536,128,0.010149332880973815
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,65536,256,0.01200213332970937
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,65536,128,0.018819200992584228
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,16384,65536,0.3555935859680176
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,65536,128,0.00942186713218689
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,16384,16384,0.11641279856363933
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,65536,64,0.018915200233459474
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,65536,32,0.004523733258247375
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,16384,12288,0.0837557315826416
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,65536,32,0.0186463991800944
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,16384,65536,0.7388160069783528
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,16384,65536,0.37593491872151696
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,16384,16384,0.18416213989257812
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,16384,12288,0.14077439308166503
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,16384,16384,0.09373760223388672
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,16384,12288,0.0714794635772705
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,16384,10240,0.11601813634236653
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,16384,10240,0.07205333709716796
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,16384,10240,0.0705450693766276
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,16384,8192,0.10973652998606365
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,16384,8192,0.06258773406346639
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,16384,8192,0.05760426521301269
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,16384,7168,0.08228693008422852
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,16384,7168,0.05539093414942423
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,16384,7168,0.0437663992245992
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,16384,6144,0.07157546679178874
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,16384,6144,0.049369601408640544
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,16384,6144,0.03855679829915364
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,16384,5120,0.060064001878102624
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,16384,5120,0.04357973337173462
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,16384,5120,0.03299840092658997
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,16384,4096,0.04852480093638102
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,16384,4096,0.03799146811167399
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,16384,4096,0.027347199122111004
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,16384,3584,0.042838398615519205
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,16384,3584,0.03528853257497151
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,16384,3584,0.024922666947046916
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,16384,3072,0.03735359907150269
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,16384,3072,0.03233493367830913
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,16384,3072,0.02190293272336324
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,16384,2048,0.016359466314315795
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,16384,2560,0.03165439963340759
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,16384,2560,0.029314132531483968
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,16384,2560,0.01949013272921244
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,16384,1024,0.02048106590906779
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,16384,2048,0.02569813330968221
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,16384,2048,0.026292266448338826
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,16384,1536,0.02021226684252421
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,16384,1536,0.023364265759785972
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,16384,1024,0.014356266458829245
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,16384,1536,0.013448533415794373
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,16384,1024,0.010359467069307963
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,16384,768,0.011447466413180033
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,16384,768,0.019143466154734293
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,16384,512,0.008552533388137818
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,16384,128,0.0034527999659379324
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,16384,768,0.009142399827639262
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,16384,512,0.007349333167076111
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,16384,64,0.0031637333333492277
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,16384,128,0.004882133503754934
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,16384,512,0.017459199825922648
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,16384,256,0.005866666634877523
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,16384,256,0.015176533659299215
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,16384,256,0.005283200244108836
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,16384,128,0.014759467045466105
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,16384,64,0.014886400103569031
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,16384,32,0.0030975999931494398
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,16384,32,0.014852266510327658
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,12288,65536,0.5528191884358724
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,12288,65536,0.2937408129374186
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,12288,65536,0.28335679372151695
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,12288,12288,0.06691413720448812
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,12288,16384,0.1433568000793457
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,12288,16384,0.08572800159454345
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,12288,8192,0.07449173132578532
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,12288,12288,0.10924800237019856
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,12288,8192,0.0494762659072876
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,12288,16384,0.07416106859842936
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,12288,12288,0.06727999846140544
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,12288,10240,0.10029866695404052
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,12288,10240,0.06543573141098022
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,12288,10240,0.04792319933573405
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,12288,8192,0.03954773346583049
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,12288,7168,0.06560853322347006
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,12288,7168,0.04485866626103719
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,12288,7168,0.03531200091044108
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,12288,4096,0.0369706670443217
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,12288,6144,0.05447680155436198
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,12288,6144,0.040830934047698976
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,12288,5120,0.04545493523279826
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,12288,6144,0.03132266600926717
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,12288,5120,0.03672426541646322
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,12288,3072,0.02860693335533142
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,12288,5120,0.02688960035641988
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,12288,4096,0.032229334115982056
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,12288,4096,0.02263573408126831
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,12288,3584,0.03344320058822632
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,12288,2560,0.016250666975975037
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,12288,3584,0.030503465731938677
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,12288,3584,0.02095680038134257
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,12288,3072,0.027710932493209838
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,12288,3072,0.018492799997329713
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,12288,2560,0.02627519965171814
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,12288,2560,0.025460267066955568
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,12288,2048,0.021667200326919555
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,12288,2048,0.02333973248799642
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,12288,2048,0.013398399949073792
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,12288,1536,0.016005333264668783
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,12288,1536,0.021288534005482994
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,12288,1536,0.011646933356920878
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,12288,1024,0.011713066697120666
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,12288,1024,0.019270400206247963
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,12288,768,0.009683199723561605
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,12288,512,0.005847466488679251
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,12288,1024,0.009059199690818786
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,12288,768,0.018202666441599527
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,12288,512,0.0075914666056633
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,12288,768,0.008116266628106435
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,12288,512,0.015154133240381876
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,12288,256,0.004186666508515676
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,12288,256,0.01506239970525106
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,12288,256,0.0050570666790008545
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,12288,128,0.0035360001027584078
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,12288,128,0.015041066209475198
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,12288,64,0.0032992000381151833
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,12288,128,0.004758400221665701
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,12288,64,0.014801067113876343
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,12288,32,0.003156266609827677
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,10240,16384,0.06330026785532633
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,10240,12288,0.09334613482157389
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,12288,32,0.014762666821479798
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,10240,65536,0.47210880915323894
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,10240,65536,0.24601386388142904
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,10240,12288,0.04925440152486165
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,10240,10240,0.05715946753819784
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,10240,10240,0.04756906827290853
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,10240,16384,0.12306133111317952
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,10240,65536,0.2343829313913981
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,10240,16384,0.07502079804738362
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,10240,12288,0.05871253410975138
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,10240,10240,0.08088106314341227
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,10240,8192,0.06360533237457275
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,10240,8192,0.04369279940923055
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,10240,7168,0.05335466861724854
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,10240,8192,0.03403306802113851
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,10240,5120,0.03319680094718933
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,10240,7168,0.044870400428771974
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,10240,7168,0.031259733438491824
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,10240,6144,0.04698453346888225
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,10240,4096,0.020218666394551596
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,10240,6144,0.03639359871546428
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,10240,6144,0.02696320017178853
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,10240,5120,0.03914986848831177
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,10240,5120,0.023565866549809775
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,10240,4096,0.03177173336346944
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,10240,4096,0.029897600412368774
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,10240,3584,0.028038400411605834
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,10240,2560,0.02404266595840454
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,10240,3584,0.027302400271097822
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,10240,3584,0.01818880041440328
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,10240,3072,0.024614399671554564
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,10240,3072,0.02550400098164876
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,10240,2560,0.0209824005762736
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,10240,3072,0.01633280018965403
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,10240,2560,0.01454080045223236
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,10240,2048,0.017758933703104655
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,10240,2048,0.02194026708602905
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,10240,2048,0.012329600254694621
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,10240,1536,0.013662933309872945
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,10240,1536,0.020364799102147422
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,10240,1024,0.010141866405804952
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,10240,1536,0.01072746713956197
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,10240,1024,0.018403200308481853
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,10240,768,0.008409600456555684
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,10240,1024,0.008667733271916707
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,10240,768,0.01690559983253479
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,10240,768,0.007038933535416921
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,10240,128,0.003188266605138779
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,10240,512,0.006592000027497609
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,10240,512,0.015457066893577575
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,10240,256,0.0037237333754698435
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,10240,512,0.0056533331672350565
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,10240,256,0.014998400211334228
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,10240,256,0.005022933085759481
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,8192,65536,0.37210559844970703
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,10240,128,0.014915200074513755
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,10240,128,0.004820266862710317
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,8192,16384,0.09246079921722412
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,10240,64,0.00297173336148262
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,10240,64,0.014738133549690247
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,10240,32,0.0029440000653266907
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,8192,12288,0.04907626708348592
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,10240,32,0.014803199966748556
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,8192,65536,0.19877972602844238
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,8192,65536,0.1851637363433838
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,8192,16384,0.05991466840108236
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,8192,16384,0.05092266798019409
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,8192,12288,0.07011520067850749
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,8192,10240,0.05888959964116415
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,8192,12288,0.039394132296244305
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,8192,7168,0.03485120137532552
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,8192,10240,0.04339413245519002
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,8192,10240,0.038754133383433025
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,8192,8192,0.056309334437052404
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,8192,8192,0.03815360069274902
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,8192,8192,0.029367466767628986
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,8192,7168,0.0421994686126709
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,8192,7168,0.025659734010696413
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,8192,6144,0.03681813478469849
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,8192,6144,0.03232746720314026
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,8192,6144,0.022533333301544188
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,8192,5120,0.031191466252009074
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,8192,5120,0.029179733991622925
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,8192,4096,0.025409066677093507
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,8192,5120,0.019633066654205323
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,8192,4096,0.02598186731338501
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,8192,4096,0.017454934120178223
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,8192,3584,0.022742400566736855
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,8192,3584,0.02478826642036438
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,8192,3584,0.015411200126012168
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,8192,3072,0.020080000162124634
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,8192,3072,0.02325013279914856
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,8192,2560,0.017068799336751303
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,8192,3072,0.013538133104642233
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,8192,2560,0.021834667523701987
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,8192,2560,0.012645333011945089
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,8192,1024,0.008631466825803121
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,8192,2048,0.01416213313738505
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,8192,2048,0.020467199881871543
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,8192,2048,0.010819199681282043
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,8192,768,0.015757866700490317
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,8192,1536,0.011532800396283467
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,8192,1536,0.019059199094772338
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,8192,512,0.0151936004559199
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,8192,1536,0.00978559950987498
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,8192,1024,0.017552000284194947
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,8192,768,0.0073077330986658735
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,8192,1024,0.007156266768773396
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,8192,768,0.006188799937566122
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,8192,512,0.005605333546797434
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,8192,256,0.003517866631348928
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,8192,512,0.005422933399677277
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,8192,256,0.014805333813031516
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,8192,256,0.005011199911435445
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,8192,32,0.014549332857131957
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,8192,128,0.0031818665564060213
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,8192,128,0.014667733510335287
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,8192,128,0.004739200075467428
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,8192,64,0.0029887999097506206
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,8192,64,0.014697600404421488
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,8192,32,0.002867199977238973
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,7168,65536,0.3243413289388021
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,7168,65536,0.17955946922302246
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,7168,16384,0.08761706352233886
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,7168,16384,0.055523200829823816
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,7168,65536,0.16175039609273273
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,7168,16384,0.04549546639124553
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,7168,12288,0.0648693323135376
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,7168,12288,0.045697065194447835
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,7168,10240,0.05427413384119669
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,7168,12288,0.04101333220799764
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,7168,10240,0.040888532002766924
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,7168,10240,0.030447999636332195
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,7168,8192,0.051275734106699625
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,7168,6144,0.030679466327031453
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,7168,8192,0.03516266743342082
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,7168,6144,0.02048106590906779
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,7168,8192,0.025891200701395674
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,7168,7168,0.03951573371887207
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,7168,7168,0.03249920010566711
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,7168,7168,0.02287893295288086
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,7168,6144,0.034595199426015216
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,7168,5120,0.029458133379618327
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,7168,5120,0.02792533238728841
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,7168,4096,0.023108265797297158
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,7168,5120,0.017908267180124917
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,7168,4096,0.02493333419164022
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,7168,4096,0.01513706644376119
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,7168,3584,0.02058666745821635
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,7168,3584,0.023986132939656575
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,7168,3584,0.01402453382809957
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,7168,3072,0.018253866831461588
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,7168,3072,0.02257919907569885
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,7168,3072,0.012732799847920737
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,7168,2560,0.01570026675860087
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,7168,2560,0.021262933810551964
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,7168,2048,0.012984533111254373
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,7168,2560,0.011634133259455363
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,7168,2048,0.019949867328008016
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,7168,2048,0.010182399551073711
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,7168,1536,0.0106495996316274
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,7168,1536,0.01874666611353556
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,7168,1536,0.009109333157539368
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,7168,512,0.004422399898370107
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,7168,1024,0.007965866724650066
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,7168,1024,0.016318933169047038
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,7168,1024,0.006790400048096975
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,7168,768,0.006818133095900218
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,7168,768,0.015708800156911215
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,7168,128,0.003127466638882955
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,7168,768,0.006084266801675161
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,7168,512,0.01514346698919932
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,7168,256,0.003436800092458725
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,7168,512,0.005411200225353241
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,7168,256,0.014805333813031516
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,7168,32,0.01451520025730133
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,7168,256,0.005148800214131674
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,7168,128,0.014646400014559427
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,7168,128,0.0048426667849222815
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,7168,64,0.002921599894762039
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,7168,64,0.014724266529083253
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,7168,32,0.002887466549873352
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,6144,65536,0.2788149197896322
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,6144,65536,0.15514453252156574
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,6144,65536,0.1418570677439372
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,6144,12288,0.03632533152898153
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,6144,16384,0.07400853633880615
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,6144,16384,0.05134186744689941
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,6144,16384,0.04016853173573812
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,6144,12288,0.056701866785685215
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,6144,12288,0.0417631983757019
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,6144,10240,0.048715734481811525
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,6144,10240,0.03706026474634806
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,6144,7168,0.020667733748753865
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,6144,10240,0.02723840077718099
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,6144,8192,0.040012800693511964
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,6144,8192,0.033497599760691325
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,6144,7168,0.03371946811676026
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,6144,5120,0.02663573424021403
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,6144,8192,0.022588799397150673
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,6144,7168,0.031201066573460896
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,6144,6144,0.0302346666653951
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,6144,6144,0.028546132644017536
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,6144,6144,0.018463999032974243
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,6144,5120,0.02582293351491292
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,6144,5120,0.016241066654523215
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,6144,3584,0.012893866499265036
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,6144,3072,0.016966400543848674
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,6144,4096,0.021082667509714763
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,6144,3072,0.01165013313293457
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,6144,4096,0.02422399918238322
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,6144,4096,0.014114133516947427
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,6144,3584,0.01920426686604818
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,6144,3584,0.023060266176859537
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,6144,3072,0.021708800395329794
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,6144,2560,0.014692266782124838
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,6144,2560,0.020669867595036827
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,6144,2560,0.010805333654085796
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,6144,1024,0.007494399944941203
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,6144,2048,0.012344533205032348
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,6144,2048,0.019207467635472618
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,6144,2048,0.009463466207186381
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,6144,1536,0.00988159974416097
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,6144,1536,0.018042665719985963
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,6144,1536,0.008684800068537394
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,6144,1024,0.01541759967803955
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,6144,768,0.006316799918810527
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,6144,1024,0.006039466460545858
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,6144,768,0.015482667088508605
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,6144,768,0.005796266595522562
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,6144,512,0.004036266605059306
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,6144,512,0.014934399724006652
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,6144,512,0.005392000079154968
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,6144,256,0.003306666761636734
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,6144,256,0.014711466431617738
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,6144,256,0.0050016000866889955
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,6144,128,0.0030016000072161358
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,6144,128,0.014535466829935709
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,6144,128,0.0046304002404212955
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,6144,64,0.0028959999481836954
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,6144,64,0.014562132954597472
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,6144,32,0.0028192001084486645
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,6144,32,0.01440000037352244
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,5120,65536,0.23565120697021485
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,5120,65536,0.1340501308441162
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,5120,65536,0.12296319802602132
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,5120,16384,0.06219626665115356
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,5120,16384,0.044571733474731444
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,5120,16384,0.03475093444188436
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,5120,12288,0.048153599103291825
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,5120,12288,0.03691519896189372
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,5120,12288,0.03186560074488322
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,5120,10240,0.041094398498535155
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,5120,10240,0.033131732543309526
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,5120,10240,0.02421440084775289
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,5120,6144,0.024753065903981526
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,5120,8192,0.033004800478617355
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,5120,8192,0.02964693307876587
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,5120,8192,0.0202239990234375
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,5120,7168,0.028534400463104247
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,5120,7168,0.02792853315671285
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,5120,7168,0.01858666737874349
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,5120,6144,0.025668267409006757
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,5120,6144,0.01667840083440145
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,5120,5120,0.020822399854660036
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,5120,5120,0.02418559988339742
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,5120,4096,0.017463467518488564
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,5120,5120,0.014476799964904785
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,5120,4096,0.022267733017603555
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,5120,4096,0.012777599692344665
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,5120,3584,0.015589333573977151
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,5120,3584,0.021259733041127524
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,5120,3072,0.013897599776585898
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,5120,3584,0.01200213332970937
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,5120,3072,0.02037013371785482
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,5120,3072,0.010865066448847454
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,5120,2560,0.012154666582743327
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,5120,2560,0.01949119965235392
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,5120,2048,0.01025279959042867
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,5120,2560,0.010197333494822184
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,5120,2048,0.018758400281270345
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,5120,2048,0.008891733487447102
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,5120,1024,0.005946666498978933
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,5120,1536,0.008648533622423809
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,5120,1536,0.017026132345199584
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,5120,1536,0.0076341331005096436
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,5120,1024,0.0067562664548556015
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,5120,512,0.005186133086681366
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,5120,1024,0.015190399686495461
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,5120,768,0.005299200117588043
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,5120,768,0.01520639955997467
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,5120,256,0.005103999873002371
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,5120,768,0.005541333556175232
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,5120,512,0.0037813333173592886
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,5120,128,0.004705066482226053
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,5120,512,0.014980266491572062
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,5120,256,0.003209600100914637
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,5120,256,0.01474240024884542
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,5120,128,0.002958933264017105
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,5120,128,0.014499200383822122
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,5120,64,0.002771199991305669
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,5120,64,0.014382933576901754
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,5120,32,0.002757333219051361
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,5120,32,0.014451199769973755
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,4096,65536,0.18533226648966472
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,4096,65536,0.10710933208465576
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,4096,16384,0.04955413341522217
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,4096,65536,0.10572799841562908
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,4096,16384,0.03816639979680379
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,4096,16384,0.030806400378545123
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,4096,12288,0.03862293163935344
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,4096,12288,0.032979200283686325
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,4096,12288,0.028135466575622558
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,4096,10240,0.032730666796366374
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,4096,7168,0.024972800413767496
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,4096,10240,0.030061866839726763
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,4096,10240,0.023089067141215006
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,4096,8192,0.02717546621958415
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,4096,8192,0.026758400599161784
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,4096,8192,0.01803626616795858
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,4096,7168,0.02434879938761393
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,4096,7168,0.01649066706498464
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,4096,6144,0.021637332439422608
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,4096,6144,0.02378666599591573
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,4096,6144,0.014808533589045205
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,4096,3584,0.013210666179656983
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,4096,5120,0.018702934185663857
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,4096,5120,0.022171733776728313
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,4096,5120,0.013238400220870972
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,4096,4096,0.015666133165359496
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,4096,4096,0.020849066972732543
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,4096,4096,0.01157866617043813
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,4096,3584,0.020331732432047524
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,4096,3584,0.011197866996129353
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,4096,3072,0.01183786690235138
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,4096,3072,0.01926079988479614
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,4096,3072,0.010070400436719258
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,4096,2560,0.010412800312042236
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,4096,1536,0.01532373329003652
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,4096,2560,0.01879146695137024
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,4096,2048,0.008952533205350239
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,4096,2560,0.009669333696365356
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,4096,2048,0.017628800868988038
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,4096,1536,0.007699200014273326
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,4096,2048,0.008120533327261608
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,4096,1536,0.006974933544794719
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,4096,1024,0.0059232001503308615
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,4096,1024,0.01541866660118103
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,4096,1024,0.005836800237496694
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,4096,256,0.0031658666829268134
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,4096,768,0.00395413339138031
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,4096,768,0.015152000387509666
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,4096,768,0.0055861334005991616
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,4096,512,0.0035957333942254386
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,4096,512,0.014812800288200378
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,4096,512,0.00521066685517629
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,4096,256,0.01474453310171763
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,4096,256,0.004942933221658071
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,4096,32,0.014679466684659323
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,4096,128,0.002883200099070867
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,4096,128,0.014626133441925048
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,4096,128,0.004644266764322917
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,4096,64,0.0027232001225153605
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,4096,64,0.014351999759674073
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,3584,16384,0.027922133604685467
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,4096,32,0.002784000088771184
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,3584,65536,0.16826559702555338
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,3584,65536,0.09809066454569498
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,3584,16384,0.046573865413665774
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,3584,65536,0.10277547041575115
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,3584,16384,0.035148799419403076
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,3584,12288,0.035073065757751466
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,3584,8192,0.024911999702453613
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,3584,12288,0.030679466327031453
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,3584,10240,0.03403306802113851
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,3584,8192,0.01738133430480957
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,3584,12288,0.023937066396077476
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,3584,10240,0.028178133567174274
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,3584,10240,0.021192532777786256
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,3584,8192,0.024145066738128662
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,3584,7168,0.022065067291259767
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,3584,7168,0.02383786638577779
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,3584,6144,0.019205333789189656
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,3584,7168,0.015397333105405173
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,3584,6144,0.022576000293095908
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,3584,6144,0.014694399634997048
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,3584,5120,0.017151999473571777
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,3584,3584,0.01959786613782247
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,3584,5120,0.02155839999516805
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,3584,5120,0.012986666957537332
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,3584,4096,0.013356799880663553
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,3584,4096,0.020038400093714395
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,3584,3584,0.011947733163833619
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,3584,4096,0.01112320025761922
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,3584,3584,0.010973866780598958
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,3584,3072,0.010868266224861145
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,3584,3072,0.018823466698328652
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,3584,3072,0.010014933347702027
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,3584,2048,0.007891199986139933
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,3584,2560,0.009669333696365356
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,3584,2560,0.018422400951385497
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,3584,2560,0.008999466896057129
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,3584,2048,0.008393599589665731
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,3584,2048,0.016204800208409628
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,3584,1024,0.0061141331990559895
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,3584,1536,0.007167999943097432
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,3584,1536,0.015243732929229736
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,3584,512,0.0035264000296592714
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,3584,1024,0.004795733094215393
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,3584,1536,0.006712533533573151
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,3584,1024,0.015186132987340293
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,3584,768,0.0038549333810806276
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,3584,768,0.01532586713631948
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,3584,768,0.0056202664971351625
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,3584,512,0.014869333306948344
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,3584,512,0.005299200117588043
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,3584,128,0.0047541335225105286
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,3584,64,0.0027114666998386385
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,3584,256,0.003126399964094162
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,3584,256,0.014670933286348978
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,3584,128,0.0028768000503381092
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,3584,256,0.004946133494377137
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,3072,65536,0.08567573229471842
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,3072,65536,0.14248426755269367
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,3584,128,0.014551466703414917
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,3584,64,0.014531200130780539
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,3584,32,0.0027306665976842242
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,3584,32,0.014436266819636025
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,3072,16384,0.0391701340675354
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,3072,12288,0.022502400477727256
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,3072,65536,0.08865386644999186
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,3072,10240,0.019796266158421835
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,3072,16384,0.03266026576360066
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,3072,12288,0.030957865715026855
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,3072,16384,0.03030400077501933
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,3072,12288,0.028382933139801024
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,3072,10240,0.026131200790405273
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,3072,7168,0.015316266814867655
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,3072,10240,0.02600853244463603
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,3072,8192,0.0213045338789622
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,3072,8192,0.02467413345972697
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,3072,5120,0.015099733074506124
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,3072,8192,0.015556266903877259
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,3072,7168,0.01950506567955017
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,3072,4096,0.01362666686375936
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,3072,7168,0.02337706685066223
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,3072,6144,0.01726613243420919
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,3072,6144,0.022242132822672525
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,3072,6144,0.013867732882499696
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,3072,5120,0.020921599864959717
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,3072,5120,0.0120170662800471
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,3072,4096,0.019754666090011596
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,3072,4096,0.010566400488217671
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,3072,3072,0.009258666634559631
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,3072,2560,0.008942932883898417
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,3072,3584,0.012455466389656066
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,3072,3584,0.018755199511845906
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,3072,3584,0.010497066378593444
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,3072,3072,0.011215999722480774
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,3072,3072,0.018539732694625853
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,3072,2560,0.017110399405161538
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,3072,2560,0.00846506655216217
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,3072,2048,0.007701333363850911
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,3072,2048,0.016131200393040977
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,3072,2048,0.007474133372306823
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,3072,768,0.003918933371702829
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,3072,1536,0.0066442668437957765
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,3072,1536,0.015659733613332113
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,3072,1024,0.004439466694990794
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,3072,1536,0.006717866659164429
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,3072,1024,0.01520639955997467
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,3072,1024,0.005981866518656413
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,3072,768,0.014912000298500061
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,3072,768,0.00558186670144399
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,3072,512,0.0035402665535608927
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,3072,512,0.014735999703407287
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,3072,512,0.005067733426888784
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,3072,256,0.003172266731659571
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,3072,256,0.01456000010172526
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,3072,256,0.004996266464392344
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,3072,128,0.003053866575161616
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,3072,128,0.014551466703414917
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,3072,128,0.004779733220736186
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,3072,64,0.002739199995994568
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,3072,64,0.014575999975204468
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,3072,32,0.0028053333361943563
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,3072,32,0.014481066664059957
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,2560,65536,0.11872426668802898
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,2560,65536,0.07428906758626302
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,2560,16384,0.03281919956207276
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,2560,65536,0.09333546956380209
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,2560,16384,0.029811199506123858
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,2560,16384,0.025076266129811602
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,2560,12288,0.026229333877563477
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,2560,12288,0.026219733556111652
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,2560,10240,0.022462934255599976
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,2560,7168,0.01698453426361084
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,2560,12288,0.022629332542419434
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,2560,10240,0.02440213362375895
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,2560,10240,0.019001599152882895
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,2560,8192,0.01881493330001831
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,2560,8192,0.022371200720469157
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,2560,8192,0.01618346671263377
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,2560,7168,0.0216213325659434
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,2560,7168,0.014194132884343466
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,2560,6144,0.015165866414705912
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,2560,4096,0.019086933135986327
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,2560,6144,0.020857600371042888
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,2560,5120,0.013512532909711203
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,2560,6144,0.013771733641624451
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,2560,5120,0.020087466637293497
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,2560,4096,0.011594667037328085
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,2560,5120,0.012456533312797547
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,2560,3584,0.01074666678905487
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,2560,4096,0.010891733566919963
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,2560,3584,0.018668800592422485
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,2560,3072,0.009839999675750732
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,2560,3584,0.010178132851918539
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,2560,3072,0.018156800667444864
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,2560,3072,0.009180800120035807
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,2560,2560,0.008311466872692108
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,2560,2560,0.01672640045483907
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,2560,2560,0.008292266726493835
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,2560,1536,0.0065984000762303666
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,2560,2048,0.007121066749095917
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,2560,2048,0.015646933515866598
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,2560,2048,0.007144533097743988
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,2560,1536,0.0061152001221974695
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,2560,1536,0.01572266618410746
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,2560,1024,0.004099199920892716
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,2560,768,0.005460266768932342
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,2560,1024,0.015334399541219077
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,2560,256,0.003091199944416682
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,2560,768,0.0038293334345022834
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,2560,1024,0.005846400062243144
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,2560,128,0.0028704000016053517
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,2560,768,0.015085867047309876
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,2560,512,0.0035082665582497918
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,2560,512,0.014957867066065469
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,2560,512,0.005217066903909048
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,2560,256,0.014628266294797262
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,2560,256,0.004954666892687479
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,2560,128,0.014726400375366211
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,2560,128,0.004625066618124644
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,2560,64,0.002639999985694885
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,2560,64,0.014469333489735923
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,2560,32,0.002752000093460083
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,2560,32,0.014332800110181173
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,2048,65536,0.09556480248769125
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,2048,65536,0.06062186559041342
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,2048,65536,0.08538880348205566
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,2048,16384,0.02690560022989909
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,2048,16384,0.02672533392906189
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,2048,12288,0.021444267034530638
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,2048,16384,0.025527467330296833
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,2048,12288,0.02376533349355062
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,2048,10240,0.018758400281270345
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,2048,10240,0.02262399991353353
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,2048,12288,0.02041920026143392
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,2048,8192,0.015611732999483744
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,2048,8192,0.021167999505996703
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,2048,10240,0.017977599302927652
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,2048,7168,0.014227199554443359
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,2048,8192,0.015284267067909241
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,2048,7168,0.02034986615180969
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,2048,7168,0.01421119968096415
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,2048,4096,0.010297600428263347
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,2048,6144,0.01274133324623108
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,2048,4096,0.010090667009353637
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,2048,3584,0.009591466188430786
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,2048,6144,0.01962453325589498
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,2048,6144,0.012702932953834534
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,2048,3584,0.0095360000928243
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,2048,5120,0.011643733580907185
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,2048,5120,0.01949013272921244
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,2048,5120,0.011755733688672384
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,2048,2560,0.01594239970048269
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,2048,4096,0.018551466862360637
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,2048,3584,0.017014400164286295
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,2048,3072,0.008924800157546996
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,2048,3072,0.016292267044385276
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,2048,3072,0.00886400043964386
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,2048,2560,0.00759680022795995
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,2048,2048,0.006540800134340922
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,2048,2560,0.007876266539096833
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,2048,2048,0.015804800391197204
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,2048,2048,0.006974933544794719
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,2048,1536,0.004844800134499868
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,2048,1536,0.015543466806411744
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,2048,1536,0.006427733103434245
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,2048,1024,0.004041599979003271
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,2048,512,0.014897066354751586
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,2048,1024,0.01518933375676473
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,2048,768,0.0037429332733154297
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,2048,1024,0.005553066730499268
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,2048,768,0.014959999918937683
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,2048,768,0.0053493330876032506
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,2048,512,0.0034133332471052804
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,2048,256,0.0030005333324273427
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,2048,128,0.004650666813055674
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,2048,512,0.004952533543109894
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,2048,256,0.014577066898345948
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,2048,128,0.002812800059715907
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,2048,256,0.004802133142948151
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,2048,128,0.014498133460680643
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,2048,64,0.002726399898529053
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,2048,64,0.014537599682807923
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,2048,32,0.002630399912595749
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,2048,32,0.014642133315404256
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,1536,65536,0.07316799958546957
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,1536,65536,0.05006613334019979
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,1536,65536,0.08287146886189779
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,1536,12288,0.019732266664505005
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,1536,10240,0.015657599767049155
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,1536,16384,0.021689599752426146
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,1536,16384,0.02407360076904297
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,1536,16384,0.024629332621892295
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,1536,12288,0.017552000284194947
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,1536,12288,0.022013866901397706
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,1536,10240,0.021090133984883627
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,1536,8192,0.01317759950955709
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,1536,6144,0.011052800218264262
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,1536,7168,0.01365013321240743
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,1536,10240,0.01739733417828878
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,1536,8192,0.019871999820073448
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,1536,8192,0.01487573285897573
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,1536,7168,0.012158933281898498
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,1536,7168,0.019250132640202842
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,1536,6144,0.01872319976488749
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,1536,6144,0.012389333049456278
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,1536,4096,0.009308800101280212
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,1536,5120,0.010126933455467224
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,1536,5120,0.01851946711540222
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,1536,5120,0.011082667112350463
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,1536,4096,0.008906666437784832
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,1536,4096,0.01689280072848002
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,1536,2560,0.006683733562628429
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,1536,3584,0.00851093331972758
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,1536,3584,0.016951467593510947
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,1536,3072,0.007982933521270752
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,1536,3584,0.008849066495895386
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,1536,3072,0.01632426679134369
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,1536,3072,0.007990399996439617
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,1536,2560,0.01606186628341675
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,1536,2560,0.0075445334116617845
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,1536,2048,0.005589333176612854
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,1536,1536,0.006364800035953522
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,1536,2048,0.015727999806404113
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,1536,2048,0.006727466483910878
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,1536,1536,0.004710400104522705
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,1536,1536,0.015543466806411744
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,1536,1024,0.004031999905904134
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,1536,1024,0.01523413360118866
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,1536,1024,0.005689600110054016
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,1536,768,0.003643733263015747
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,1536,256,0.01452906628449758
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,1536,768,0.014865066607793173
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,1536,256,0.004761599997679392
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,1536,768,0.005356800059477488
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,1536,512,0.0033759998778502146
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,1536,512,0.014723199605941772
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,1536,512,0.0050901333491007485
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,1536,256,0.00306986669699351
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,1536,128,0.0028575999041398365
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,1536,128,0.014388266205787658
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,1536,128,0.0047189335028330484
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,1024,65536,0.038441598415374756
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,1024,16384,0.01575040022532145
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,1536,64,0.0026890667776266735
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,1536,64,0.014470400412877402
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,1024,12288,0.013182933131853739
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,1536,32,0.0027200000981489818
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,1536,32,0.014425599575042724
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,1024,65536,0.04921600023905436
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,1024,65536,0.08257173697153727
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,1024,16384,0.02145706613858541
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,1024,16384,0.02452053427696228
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,1024,8192,0.018339200814565023
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,1024,12288,0.019679999351501463
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,1024,12288,0.01967039903004964
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,1024,7168,0.018066134055455527
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,1024,10240,0.011801600456237793
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,1024,10240,0.0191210667292277
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,1024,8192,0.010371200243631999
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,1024,10240,0.017132800817489625
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,1024,8192,0.014590932925542196
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,1024,7168,0.009756799538930256
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,1024,7168,0.013273599743843078
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,1024,6144,0.008789333701133727
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,1024,6144,0.017271467049916587
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,1024,6144,0.011785599589347839
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,1024,5120,0.008291199803352356
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,1024,5120,0.017925333976745606
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,1024,4096,0.007454933226108551
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,1024,5120,0.01034986674785614
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,1024,4096,0.01723519961039225
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,1024,3584,0.00682773341735204
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,1024,4096,0.009223467111587525
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,1024,3584,0.016754132509231568
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,1024,3584,0.008769067128499348
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,1024,3072,0.006390400230884552
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,1024,3072,0.016328533490498863
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,1024,3072,0.00803413341442744
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,1024,2560,0.006002133091290792
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,1024,2560,0.01613866686820984
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,1024,2048,0.005376000205675761
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,1024,2560,0.007492266595363617
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,1024,2048,0.015675733486811318
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,1024,1536,0.0047658666968345646
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,1024,768,0.0035978667438030243
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,1024,1024,0.005643733342488607
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,1024,768,0.014787200093269347
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,1024,2048,0.0068245331446329755
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,1024,1536,0.015362133582433065
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,1024,1536,0.006277333199977875
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,1024,1024,0.004013866682847341
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,1024,1024,0.015118933717409768
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,1024,768,0.005333333214124044
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,1024,512,0.0033610666791598
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,1024,512,0.014664533734321594
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,1024,256,0.003053866575161616
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,1024,128,0.004721066852410635
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,1024,512,0.005021866659323374
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,1024,256,0.01456106702486674
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,1024,128,0.0027637332677841187
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,1024,256,0.004863999783992767
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,1024,128,0.014670933286348978
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,1024,64,0.0030165334542592366
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,1024,64,0.0145578662554423
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,1024,32,0.0030016000072161358
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,1024,32,0.014934399724006652
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,768,65536,0.038447999954223634
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,768,65536,0.032985599835713704
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,768,16384,0.013172266880671182
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,768,65536,0.08247360388437906
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,768,16384,0.019911466042200725
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,768,12288,0.019469867149988808
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,768,12288,0.011422933141390482
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,768,8192,0.009131733576456707
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,768,12288,0.018705066045125326
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,768,7168,0.008473599950472515
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,768,16384,0.02453440030415853
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,768,10240,0.010309333602587383
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,768,10240,0.017825067043304443
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,768,8192,0.017460266749064125
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,768,10240,0.017095466454823814
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,768,8192,0.014410666624704995
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,768,7168,0.017578667402267455
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,768,6144,0.007753600180149078
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,768,7168,0.013031466801961263
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,768,6144,0.01722666621208191
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,768,6144,0.011648000280062357
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,768,5120,0.007187200089295705
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,768,5120,0.01776533325513204
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,768,4096,0.006123733520507812
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,768,5120,0.01048533320426941
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,768,4096,0.017012266318003337
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,768,3584,0.0064735998709996535
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,768,3584,0.01684266726175944
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,768,4096,0.009148800373077392
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,768,3072,0.00597866674264272
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,768,3072,0.016428800423940022
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,768,3584,0.008836266398429871
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,768,2560,0.007464533547560374
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,768,2560,0.006071466704209646
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,768,2560,0.01615466674168905
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,768,3072,0.007945600152015685
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,768,2048,0.005341866612434387
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,768,2048,0.015924266974131265
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,768,2048,0.006820266445477803
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,768,1536,0.004677333434422811
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,768,1536,0.015525333086649575
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,768,1536,0.006247466802597046
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,768,1024,0.0039711999396483105
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,768,1024,0.015054933230082192
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,768,768,0.003621333340803782
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,768,256,0.0031744000812371576
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,768,1024,0.005566933254400889
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,768,256,0.014728533228238425
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,768,128,0.0029535998900731405
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,768,768,0.01520853340625763
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,768,768,0.005335466563701629
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,768,512,0.003289599965016047
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,768,512,0.014852266510327658
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,768,512,0.00492799977461497
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,768,256,0.004806399842103322
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,768,128,0.01447466711203257
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,768,128,0.004695466657479604
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,768,64,0.0028362666567166646
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,768,64,0.014492799838383993
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,768,32,0.0028064000109831494
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,768,32,0.014356266458829245
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,512,65536,0.027203200260798137
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,512,65536,0.027194666862487792
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,512,16384,0.01037440001964569
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,512,65536,0.08231573104858399
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,512,16384,0.019233065843582153
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,512,12288,0.00905386706193288
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,512,12288,0.01741973360379537
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,512,16384,0.023811199267705283
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,512,10240,0.008601599931716919
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,512,7168,0.006646400193373363
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,512,12288,0.019332265853881835
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,512,10240,0.017100799083709716
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,512,8192,0.00766186664501826
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,512,6144,0.01737066706021627
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,512,7168,0.012746666868527731
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,512,10240,0.01637440025806427
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,512,8192,0.017100799083709716
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,512,7168,0.017591466506322227
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,512,4096,0.0058442667126655575
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,512,8192,0.013974400361378989
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,512,4096,0.0171615997950236
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,512,6144,0.006140799820423126
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,512,6144,0.011542399724324543
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,512,5120,0.006259199976921081
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,512,3072,0.006025599936644236
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,512,5120,0.01803413430849711
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,512,5120,0.010410666465759277
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,512,4096,0.00913813312848409
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,512,3584,0.006344533463319142
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,512,3584,0.016826667388280234
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,512,3584,0.008842666943868
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,512,3072,0.01651306649049123
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,512,3072,0.007970133423805236
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,512,2560,0.007590400179227193
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,512,1536,0.004839466512203216
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,512,2560,0.006200533111890157
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,512,1024,0.004260266820589701
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,512,2560,0.016307199994723
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,512,2048,0.005463466544946035
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,512,2048,0.015868799885114034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,512,1536,0.015726932883262636
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,512,1536,0.006222933530807495
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,512,512,0.0034453332424163817
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,512,2048,0.006650666892528534
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,512,1024,0.015198933084805808
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,512,768,0.003704533229271571
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,512,1024,0.005532800157864889
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,512,768,0.015024000406265258
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,512,768,0.005392000079154968
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,512,512,0.014811733365058899
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,512,512,0.004985600213209788
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,512,256,0.0030879999200503034
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,512,256,0.014724266529083253
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,512,128,0.002846933404604594
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,512,128,0.014447999993960061
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,512,256,0.004692266881465912
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,512,64,0.002647466709216436
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,512,128,0.0045962666471799215
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,512,64,0.014600533246994018
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,512,32,0.0027402666707833606
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,512,32,0.014797866344451904
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,256,65536,0.01651093363761902
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,256,65536,0.022487467527389525
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,256,16384,0.0072405333320299785
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,256,16384,0.017369600137074788
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,256,65536,0.08177066644032796
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,256,12288,0.006217599908510844
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,256,12288,0.017219199736913045
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,256,16384,0.023592533667882283
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,256,10240,0.00629013329744339
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,256,10240,0.017195733388264973
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,256,12288,0.018747733036677042
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,256,8192,0.0061152001221974695
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,256,10240,0.016260266304016113
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,256,8192,0.01704533298810323
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,256,8192,0.013770666718482972
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,256,7168,0.005971199770768484
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,256,7168,0.017504000663757326
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,256,7168,0.01276479959487915
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,256,6144,0.005798399945100148
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,256,5120,0.01034346620241801
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,256,6144,0.017179733514785765
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,256,3584,0.006321066617965698
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,256,3584,0.016883200407028197
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,256,5120,0.006247466802597046
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,256,5120,0.017918932437896728
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,256,6144,0.011476266384124755
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,256,4096,0.005890133480230967
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,256,4096,0.017122133572896322
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,256,3584,0.008689066767692566
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,256,4096,0.009213866790135701
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,256,2048,0.005378133555253347
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,256,3072,0.0058378666639328
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,256,3072,0.01662613352139791
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,256,3072,0.007869866490364075
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,256,2560,0.006044800082842508
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,256,2560,0.016179200013478598
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,256,2048,0.015884799758593242
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,256,2560,0.007438933352629344
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,256,1536,0.0046965335806210835
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,256,1536,0.015518933534622192
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,256,2048,0.006649599969387054
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,256,1024,0.004026666780312856
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,256,1024,0.015090133746465048
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,256,768,0.0036864000062147772
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,256,768,0.005298133194446564
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,256,1536,0.006142933170000712
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,256,768,0.014920533696810404
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,256,1024,0.005554133156935374
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,256,512,0.0033600000043710076
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,256,512,0.014708266655604044
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,256,512,0.004941866795221964
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,256,256,0.0030570665995279947
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,256,256,0.014520532886187234
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,256,128,0.004714666803677877
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,256,128,0.0028575999041398365
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,256,128,0.014376533031463624
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,256,64,0.002696533252795537
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,256,256,0.004709333181381226
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,256,64,0.01439466675122579
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,256,32,0.002609066665172577
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,256,32,0.014418133099873862
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,128,65536,0.011322666207949321
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,128,65536,0.019011199474334717
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,128,65536,0.08207680384318033
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,128,16384,0.0058112000425656635
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,128,16384,0.017398399114608765
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,128,16384,0.02379946708679199
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,128,12288,0.005842133363087972
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,128,12288,0.01703146696090698
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,128,10240,0.005919999877611796
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,128,12288,0.018687999248504637
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,128,10240,0.017191465695699057
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,128,8192,0.006019199887911478
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,128,10240,0.016242133577664693
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,128,8192,0.016804265975952148
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,128,8192,0.0139957328637441
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,128,5120,0.005977599819501241
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,128,7168,0.005846400062243144
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,128,7168,0.017412267128626504
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,128,7168,0.01274773379166921
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,128,6144,0.0056309332450230915
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,128,6144,0.016960000991821288
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,128,5120,0.017448532581329345
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,128,6144,0.01156160036722819
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,128,3584,0.008758399883906047
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,128,4096,0.005520000060399374
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,128,4096,0.016790399948755898
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,128,5120,0.010246400038401287
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,128,3584,0.005994666616121928
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,128,3584,0.01649493376413981
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,128,4096,0.009078400333722432
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,128,3072,0.005776000022888183
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,128,3072,0.01618773341178894
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,128,2560,0.005672533313433329
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,128,2560,0.01596799989541372
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,128,3072,0.007880533238252004
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,128,2048,0.005166933437188466
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,128,2048,0.015726932883262636
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,128,2048,0.006679466863473256
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,128,2560,0.007499733567237854
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,128,1536,0.004555733501911163
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,128,1536,0.015589333573977151
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,128,1024,0.0038304001092910765
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,128,1024,0.015064533551534018
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,128,1536,0.006164266665776571
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,128,768,0.003664000084002813
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,128,1024,0.0060928001999855045
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,128,512,0.00490880012512207
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,128,256,0.01442026694615682
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,128,768,0.01495680014292399
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,128,512,0.0033600000043710076
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,128,512,0.014682666460673014
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,128,768,0.005178666611512502
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,128,256,0.003013333429892858
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,128,128,0.002791466563940048
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,128,128,0.0142794668674469
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,64,65536,0.008438400427500407
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,128,256,0.004796800017356872
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,128,64,0.0026047999660174055
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,128,64,0.01439573367436727
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,128,32,0.002644266684850057
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8_block,1,128,128,0.00459199994802475
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,128,32,0.014338133732477823
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,64,65536,0.017656532923380534
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,64,16384,0.005793066819508871
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,64,16384,0.017409066359202065
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,64,12288,0.005643733342488607
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,64,12288,0.016612266500790916
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,64,10240,0.005892266829808554
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,64,10240,0.017223467429478966
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,64,8192,0.005762133498986562
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,64,8192,0.016872533162434897
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,64,7168,0.005688533186912537
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,64,7168,0.01738133430480957
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,64,6144,0.005548800031344095
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,64,6144,0.01680533289909363
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,64,5120,0.005950933198134104
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,64,5120,0.017483733097712197
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,64,4096,0.005513600011666616
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,64,4096,0.016664533813794454
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,64,3584,0.006027733286221823
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,64,3584,0.01658453345298767
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,64,1536,0.004421333471934
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,64,3072,0.005628799895445505
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,64,3072,0.016037333011627197
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,64,2560,0.005678933362166087
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,64,768,0.003416533271471659
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,64,768,0.014667733510335287
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,64,2560,0.015961600343386333
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,64,2048,0.004994133114814758
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,64,2048,0.01527253290017446
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,64,1536,0.015343999862670899
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,64,1024,0.0037759999434153237
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,64,1024,0.01479039986928304
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,64,512,0.003156266609827677
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,64,512,0.014750933647155762
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,64,256,0.0030250666042168934
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,64,256,0.014602667093276978
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,64,128,0.002771199991305669
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,64,128,0.014479999740918478
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,64,64,0.0027029333015282948
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,64,64,0.014461867014567056
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,64,32,0.002657066782315572
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,64,32,0.014460800091425577
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,32,65536,0.007987200220425924
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,32,65536,0.01803413430849711
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,32,16384,0.005634133517742157
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,32,16384,0.017237333456675212
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,32,12288,0.005789866546789805
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,32,12288,0.016838399569193523
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,32,10240,0.005805866420269012
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,32,10240,0.01723840037981669
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,32,8192,0.00572266678015391
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,32,8192,0.016687999169031777
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,32,7168,0.00565119981765747
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,32,7168,0.01723626653353373
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,32,6144,0.005530666808287303
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,32,6144,0.016809600591659545
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,32,5120,0.005891199906667074
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,32,5120,0.017403733730316163
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,32,2560,0.005603200197219849
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,32,4096,0.005493333439032236
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,32,4096,0.01650879979133606
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,32,3584,0.0059125334024429325
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,32,3584,0.016327466567357382
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,32,3072,0.005594666798909505
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,32,3072,0.01599360009034475
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,32,2560,0.01590933303038279
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,32,2048,0.005048533280690512
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,32,2048,0.015745066603024802
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,32,1536,0.00444160004456838
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,32,1536,0.015382400155067444
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,32,1024,0.0037920000652472176
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,32,1024,0.014827733238538107
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,32,768,0.003483733286460241
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,32,768,0.015018666783968607
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,32,512,0.003256533294916153
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,32,512,0.014709333578745524
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,32,256,0.0028575999041398365
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,32,32,0.014339199662208557
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,32,256,0.014532267053922018
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,32,128,0.00275093341867129
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,32,128,0.014293332894643148
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,32,64,0.00264533335963885
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,fp8,1,32,64,0.01432213286558787
TRTLLM,1.2.0rc5,NVIDIA H100 80GB HBM3,gemm,torch_flow,float16,1,32,32,0.0026709333062171934
